diff options
Diffstat (limited to 'llvm/lib/Target/Hexagon')
43 files changed, 3671 insertions, 719 deletions
diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index b94b148..c18db98 100644 --- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -463,7 +463,7 @@ void HexagonOperand::print(raw_ostream &OS, const MCAsmInfo &MAI) const { break; case Register: OS << "<register R"; - OS << getReg() << ">"; + OS << getReg().id() << ">"; break; case Token: OS << "'" << getToken() << "'"; diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt index 1a5f096..eddab5a 100644 --- a/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -37,6 +37,8 @@ add_llvm_target(HexagonCodeGen HexagonGenMemAbsolute.cpp HexagonGenMux.cpp HexagonGenPredicate.cpp + HexagonGenWideningVecFloatInstr.cpp + HexagonGenWideningVecInstr.cpp HexagonHardwareLoops.cpp HexagonHazardRecognizer.cpp HexagonInstrInfo.cpp @@ -53,6 +55,7 @@ add_llvm_target(HexagonCodeGen HexagonNewValueJump.cpp HexagonOptAddrMode.cpp HexagonOptimizeSZextends.cpp + HexagonOptShuffleVector.cpp HexagonPeephole.cpp HexagonQFPOptimizer.cpp HexagonRDFOpt.cpp diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h index 422ab20..b98369d 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.h +++ b/llvm/lib/Target/Hexagon/Hexagon.h @@ -92,6 +92,9 @@ FunctionPass *createHexagonGenInsert(); FunctionPass *createHexagonGenMemAbsolute(); FunctionPass *createHexagonGenMux(); FunctionPass *createHexagonGenPredicate(); +FunctionPass * +createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &); +FunctionPass *createHexagonGenWideningVecInstr(const HexagonTargetMachine &); FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOptLevel OptLevel); @@ -102,6 +105,7 @@ FunctionPass *createHexagonMergeActivateWeight(); FunctionPass *createHexagonNewValueJump(); FunctionPass *createHexagonOptAddrMode(); FunctionPass *createHexagonOptimizeSZextends(); +FunctionPass *createHexagonOptShuffleVector(const HexagonTargetMachine &); FunctionPass *createHexagonPacketizer(bool Minimal); FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonRDFOpt(); diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td index ede8463..17c72c3 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.td +++ b/llvm/lib/Target/Hexagon/Hexagon.td @@ -413,6 +413,8 @@ include "HexagonPatternsV65.td" include "HexagonDepMappings.td" include "HexagonIntrinsics.td" +defm : RemapAllTargetPseudoPointerOperands<IntRegs>; + def HexagonInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 68f5312..8483374 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -137,8 +137,7 @@ namespace { return !Bits.any(); } bool includes(const RegisterSet &Rs) const { - // A.test(B) <=> A-B != {} - return !Rs.Bits.test(Bits); + return Rs.Bits.subsetOf(Bits); } bool intersects(const RegisterSet &Rs) const { return Bits.anyCommon(Rs.Bits); @@ -1796,7 +1795,7 @@ namespace { const MachineDominatorTree &MDT; const HexagonInstrInfo &HII; - const HexagonRegisterInfo &HRI; + [[maybe_unused]] const HexagonRegisterInfo &HRI; MachineRegisterInfo &MRI; BitTracker &BT; }; @@ -1886,7 +1885,7 @@ bool BitSimplification::matchHalf(unsigned SelfR, bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum) { - auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI); + auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum); auto *RRC = HBS::getFinalVRegClass(R, MRI); return OpRC->hasSubClassEq(RRC); } diff --git a/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp b/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp index eca5ac1..bae3484 100644 --- a/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp @@ -24,7 +24,6 @@ #include <cstdint> #include <iterator> #include <map> -#include <utility> using namespace llvm; diff --git a/llvm/lib/Target/Hexagon/HexagonCallingConv.td b/llvm/lib/Target/Hexagon/HexagonCallingConv.td index dceb70c..80adde8 100644 --- a/llvm/lib/Target/Hexagon/HexagonCallingConv.td +++ b/llvm/lib/Target/Hexagon/HexagonCallingConv.td @@ -25,6 +25,8 @@ def CC_HexagonStack: CallingConv<[ def CC_Hexagon_Legacy: CallingConv<[ CCIfType<[i1,i8,i16], CCPromoteToType<i32>>, + CCIfType<[bf16], + CCBitConvertToType<i32>>, CCIfType<[f32], CCBitConvertToType<i32>>, CCIfType<[f64], @@ -55,6 +57,8 @@ def CC_Hexagon_Legacy: CallingConv<[ def CC_Hexagon: CallingConv<[ CCIfType<[i1,i8,i16], CCPromoteToType<i32>>, + CCIfType<[bf16], + CCBitConvertToType<i32>>, CCIfType<[f32], CCBitConvertToType<i32>>, CCIfType<[f64], @@ -88,6 +92,8 @@ def CC_Hexagon: CallingConv<[ def RetCC_Hexagon: CallingConv<[ CCIfType<[i1,i8,i16], CCPromoteToType<i32>>, + CCIfType<[bf16], + CCBitConvertToType<i32>>, CCIfType<[f32], CCBitConvertToType<i32>>, CCIfType<[f64], @@ -149,16 +155,16 @@ def CC_Hexagon_HVX: CallingConv<[ CCIfType<[v128i1], CCPromoteToType<v128i8>>>, CCIfHvx128< - CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16], + CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16,v64bf16], CCAssignToReg<[V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15]>>>, CCIfHvx128< - CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16], + CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16,v128bf16], CCAssignToReg<[W0,W1,W2,W3,W4,W5,W6,W7]>>>, CCIfHvx128< - CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16], + CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16,v64bf16], CCAssignToStack<128,128>>>, CCIfHvx128< - CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16], + CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16,v64bf16], CCAssignToStack<256,128>>>, CCDelegateTo<CC_Hexagon> @@ -175,10 +181,10 @@ def RetCC_Hexagon_HVX: CallingConv<[ // HVX 128-byte mode CCIfHvx128< - CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16], + CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16,v64bf16], CCAssignToReg<[V0]>>>, CCIfHvx128< - CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16], + CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16,v128bf16], CCAssignToReg<[W0]>>>, CCDelegateTo<RetCC_Hexagon> diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index dd343d9..df61226 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -1405,7 +1405,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg); int FI = I.getFrameIdx(); const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); - HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI, Register()); + HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, Register()); if (IsKill) MBB.addLiveIn(Reg); } @@ -1470,7 +1470,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, MCRegister Reg = I.getReg(); const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); int FI = I.getFrameIdx(); - HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI, Register()); + HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, Register()); } return true; @@ -1814,8 +1814,7 @@ bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B, .addReg(SrcR, getKillRegState(IsKill)) .addReg(TmpR0, RegState::Kill); - auto *HRI = B.getParent()->getSubtarget<HexagonSubtarget>().getRegisterInfo(); - HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI, Register()); + HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, Register()); expandStoreVec(B, std::prev(It), MRI, HII, NewRegs); NewRegs.push_back(TmpR0); @@ -1844,9 +1843,7 @@ bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B, BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0) .addImm(0x01010101); - MachineFunction &MF = *B.getParent(); - auto *HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); - HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI, Register()); + HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, Register()); expandLoadVec(B, std::prev(It), MRI, HII, NewRegs); BuildMI(B, It, DL, HII.get(Hexagon::V6_vandvrt), DstR) @@ -2225,7 +2222,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, if (!Bad) { // If the addressing mode is ok, check the register class. unsigned OpNum = Load ? 0 : 2; - auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI); + auto *RC = HII.getRegClass(In.getDesc(), OpNum); RC = getCommonRC(SI.RC, RC); if (RC == nullptr) Bad = true; @@ -2395,7 +2392,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(), SrcOp.getSubReg() }; - auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI); + auto *RC = HII.getRegClass(SI.getDesc(), 2); // The this-> is needed to unconfuse MSVC. Register FoundR = this->findPhysReg(MF, Range, IM, DM, RC); LLVM_DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI) diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp index ff876f6..18fcd6a 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -153,8 +153,7 @@ namespace { return !BitVector::any(); } bool includes(const RegisterSet &Rs) const { - // A.BitVector::test(B) <=> A-B != {} - return !Rs.BitVector::test(*this); + return Rs.BitVector::subsetOf(*this); } bool intersects(const RegisterSet &Rs) const { return BitVector::anyCommon(Rs); diff --git a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp index 74e5abe..c6fffde 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp @@ -43,7 +43,6 @@ #include <cassert> #include <iterator> #include <limits> -#include <utility> #define DEBUG_TYPE "hexmux" diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp index 9c81e963..412d587 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -30,7 +30,6 @@ #include <cassert> #include <iterator> #include <queue> -#include <utility> #define DEBUG_TYPE "gen-pred" @@ -52,8 +51,7 @@ private: }; [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, - const PrintRegister &PR); -raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &PR) { + const PrintRegister &PR) { return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg); } diff --git a/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp b/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp new file mode 100644 index 0000000..7271f1f --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp @@ -0,0 +1,565 @@ +//===------------------- HexagonGenWideningVecFloatInstr.cpp --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Replace widening vector float operations with hexagon intrinsics. +// +//===----------------------------------------------------------------------===// +// +// Brief overview of working of GenWideningVecFloatInstr pass. +// This version of pass is replica of already existing pass(which will replace +// widen vector integer operations with it's respective intrinsics). In this +// pass we will generate hexagon intrinsics for widen vector float instructions. +// +// Example1(64 vector-width widening): +// %wide.load = load <64 x half>, <64 x half>* %0, align 2 +// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2 +// %1 = fpext <64 x half> %wide.load to <64 x float> +// %3 = fpext <64 x half> %wide.load53 to <64 x float> +// %4 = fmul <64 x float> %1, %3 +// +// If we run this pass on the above example, it will first find fmul +// instruction, and then it will check whether the operands of fmul instruction +// (%1 and %3) belongs to either of these categories [%1 ->fpext, %3 ->fpext] +// or [%1 ->fpext, %3 ->constant_vector] or [%1 ->constant_vector, %3 ->fpext]. +// If it sees such pattern, then this pass will replace such pattern with +// appropriate hexagon intrinsics. +// +// After replacement: +// %wide.load = load <64 x half>, <64 x half>* %0, align 2 +// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2 +// %3 = bitcast <64 x half> %wide.load to <32 x i32> +// %4 = bitcast <64 x half> %wide.load53 to <32 x i32> +// %5 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %4) +// %6 = shufflevector <64 x i32> %5, <64 x i32> poison, <64 x i32> ShuffMask1 +// %7 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %6) +// %8 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %6) +// %9 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %7) +// %10 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %8) +// %11 = bitcast <32 x i32> %9 to <32 x float> +// %12 = bitcast <32 x i32> %10 to <32 x float> +// %13 = shufflevector <32 x float> %12, <32 x float> %11, <64 x i32> ShuffMask2 +// +// +// +// Example2(128 vector-width widening): +// %0 = bitcast half* %a to <128 x half>* +// %wide.load = load <128 x half>, <128 x half>* %0, align 2 +// %1 = fpext <128 x half> %wide.load to <128 x float> +// %2 = bitcast half* %b to <128 x half>* +// %wide.load2 = load <128 x half>, <128 x half>* %2, align 2 +// %3 = fpext <128 x half> %wide.load2 to <128 x float> +// %4 = fmul <128 x float> %1, %3 +// +// After replacement: +// %0 = bitcast half* %a to <128 x half>* +// %wide.load = load <128 x half>, <128 x half>* %0, align 2 +// %1 = bitcast half* %b to <128 x half>* +// %wide.load2 = load <128 x half>, <128 x half>* %1, align 2 +// %2 = bitcast <128 x half> %wide.load to <64 x i32> +// %3 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %2) +// %4 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %2) +// %5 = bitcast <128 x half> %wide.load2 to <64 x i32> +// %6 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %5) +// %7 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %5) +// %8 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %6) +// %9 = shufflevector <64 x i32> %8, <64 x i32> poison, <64 x i32> Mask1 +// %10 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %9) +// %11 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %9) +// %12 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %10) +// %13 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %11) +// %14 = bitcast <32 x i32> %12 to <32 x float> +// %15 = bitcast <32 x i32> %13 to <32 x float> +// %16 = shufflevector <32 x float> %15, <32 x float> %14, <64 x i32> Mask2 +// %17 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%4, %7) +// %18 = shufflevector <64 x i32> %17, <64 x i32> poison, <64 x i32> Mask1 +// %19 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %18) +// %20 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %18) +// %21 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %19) +// %22 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %20) +// %23 = bitcast <32 x i32> %21 to <32 x float> +// %24 = bitcast <32 x i32> %22 to <32 x float> +// %25 = shufflevector <32 x float> %24, <32 x float> %23, <64 x i32> Mask2 +// %26 = shufflevector <64 x float> %25, <64 x float> %16, <128 x i32> Mask3 +// +// +//===----------------------------------------------------------------------===// +#include "HexagonTargetMachine.h" +#include "llvm/ADT/APInt.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include <algorithm> +#include <utility> + +using namespace llvm; + +namespace llvm { +void initializeHexagonGenWideningVecFloatInstrPass(PassRegistry &); +FunctionPass * +createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &); +} // end namespace llvm + +namespace { + +class HexagonGenWideningVecFloatInstr : public FunctionPass { +public: + static char ID; + + HexagonGenWideningVecFloatInstr() : FunctionPass(ID) { + initializeHexagonGenWideningVecFloatInstrPass( + *PassRegistry::getPassRegistry()); + } + + HexagonGenWideningVecFloatInstr(const HexagonTargetMachine *TM) + : FunctionPass(ID), TM(TM) { + initializeHexagonGenWideningVecFloatInstrPass( + *PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Hexagon generate widening vector float instructions"; + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + FunctionPass::getAnalysisUsage(AU); + } + +private: + Module *M = nullptr; + const HexagonTargetMachine *TM = nullptr; + const HexagonSubtarget *HST = nullptr; + unsigned HwVLen; + unsigned NumHalfEltsInFullVec; + + struct OPInfo { + Value *OP; + Value *ExtInOP; + unsigned ExtInSize; + }; + + bool visitBlock(BasicBlock *B); + bool processInstruction(Instruction *Inst); + bool replaceWithIntrinsic(Instruction *Inst, OPInfo &OP1Info, + OPInfo &OP2Info); + + bool getOperandInfo(Value *V, OPInfo &OPI); + bool isExtendedConstant(Constant *C); + unsigned getElementSizeInBits(Value *V); + Type *getElementTy(unsigned size, IRBuilder<> &IRB); + + Value *adjustExtensionForOp(OPInfo &OPI, IRBuilder<> &IRB, + unsigned NewEltsize, unsigned NumElts); + + std::pair<Value *, Value *> opSplit(Value *OP, Instruction *Inst); + + Value *createIntrinsic(Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, + Value *NewOP2, FixedVectorType *ResType, + unsigned NumElts, bool BitCastOp); +}; + +} // end anonymous namespace + +char HexagonGenWideningVecFloatInstr::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonGenWideningVecFloatInstr, "widening-vec-float", + "Hexagon generate " + "widening vector float instructions", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(HexagonGenWideningVecFloatInstr, "widening-vec-float", + "Hexagon generate " + "widening vector float instructions", + false, false) + +bool HexagonGenWideningVecFloatInstr::isExtendedConstant(Constant *C) { + if (Value *SplatV = C->getSplatValue()) { + if (auto *CFP = dyn_cast<ConstantFP>(SplatV)) { + bool Ignored; + APFloat APF = CFP->getValueAPF(); + APFloat::opStatus sts = APF.convert( + APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); + if (sts == APFloat::opStatus::opOK || sts == APFloat::opStatus::opInexact) + return true; + } + return false; + } + unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements(); + for (unsigned i = 0, e = NumElts; i != e; ++i) { + if (auto *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(i))) { + bool Ignored; + APFloat APF = CFP->getValueAPF(); + APFloat::opStatus sts = APF.convert( + APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); + if (sts != APFloat::opStatus::opOK && sts != APFloat::opStatus::opInexact) + return false; + continue; + } + return false; + } + return true; +} + +unsigned HexagonGenWideningVecFloatInstr::getElementSizeInBits(Value *V) { + Type *ValTy = V->getType(); + Type *EltTy = ValTy; + if (dyn_cast<Constant>(V)) { + unsigned EltSize = + cast<VectorType>(EltTy)->getElementType()->getPrimitiveSizeInBits(); + unsigned ReducedSize = EltSize / 2; + + return ReducedSize; + } + + if (ValTy->isVectorTy()) + EltTy = cast<VectorType>(ValTy)->getElementType(); + return EltTy->getPrimitiveSizeInBits(); +} + +bool HexagonGenWideningVecFloatInstr::getOperandInfo(Value *V, OPInfo &OPI) { + using namespace PatternMatch; + OPI.OP = V; + Value *ExtV = nullptr; + Constant *C = nullptr; + + if (match(V, (m_FPExt(m_Value(ExtV)))) || + match(V, + m_Shuffle(m_InsertElt(m_Poison(), m_FPExt(m_Value(ExtV)), m_Zero()), + m_Poison(), m_ZeroMask()))) { + + if (auto *ExtVType = dyn_cast<VectorType>(ExtV->getType())) { + // Matches the first branch. + if (ExtVType->getElementType()->isBFloatTy()) + // do not confuse bf16 with ieee-fp16. + return false; + } else { + // Matches the second branch (insert element branch) + if (ExtV->getType()->isBFloatTy()) + return false; + } + + OPI.ExtInOP = ExtV; + OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP); + return true; + } + + if (match(V, m_Constant(C))) { + if (!isExtendedConstant(C)) + return false; + OPI.ExtInOP = C; + OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP); + return true; + } + + return false; +} + +Type *HexagonGenWideningVecFloatInstr::getElementTy(unsigned size, + IRBuilder<> &IRB) { + switch (size) { + case 16: + return IRB.getHalfTy(); + case 32: + return IRB.getFloatTy(); + default: + llvm_unreachable("Unhandled Element size"); + } +} + +Value *HexagonGenWideningVecFloatInstr::adjustExtensionForOp( + OPInfo &OPI, IRBuilder<> &IRB, unsigned NewExtSize, unsigned NumElts) { + Value *V = OPI.ExtInOP; + unsigned EltSize = getElementSizeInBits(OPI.ExtInOP); + assert(NewExtSize >= EltSize); + Type *EltType = getElementTy(NewExtSize, IRB); + auto *NewOpTy = FixedVectorType::get(EltType, NumElts); + + if (auto *C = dyn_cast<Constant>(V)) + return IRB.CreateFPTrunc(C, NewOpTy); + + if (V->getType()->isVectorTy()) + if (NewExtSize == EltSize) + return V; + + return nullptr; +} + +std::pair<Value *, Value *> +HexagonGenWideningVecFloatInstr::opSplit(Value *OP, Instruction *Inst) { + Type *InstTy = Inst->getType(); + unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements(); + IRBuilder<> IRB(Inst); + Intrinsic::ID IntHi = Intrinsic::hexagon_V6_hi_128B; + Intrinsic::ID IntLo = Intrinsic::hexagon_V6_lo_128B; + Function *ExtFHi = Intrinsic::getOrInsertDeclaration(M, IntHi); + Function *ExtFLo = Intrinsic::getOrInsertDeclaration(M, IntLo); + if (NumElts == 128) { + auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 64); + OP = IRB.CreateBitCast(OP, InType); + } + Value *OP1Hi = IRB.CreateCall(ExtFHi, {OP}); + Value *OP1Lo = IRB.CreateCall(ExtFLo, {OP}); + return std::pair<Value *, Value *>(OP1Hi, OP1Lo); +} + +Value *HexagonGenWideningVecFloatInstr::createIntrinsic( + Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, Value *NewOP2, + FixedVectorType *ResType, unsigned NumElts, bool BitCastOp) { + + IRBuilder<> IRB(Inst); + Function *ExtF = Intrinsic::getOrInsertDeclaration(M, IntId); + Function *ConvF = Intrinsic::getOrInsertDeclaration( + M, Intrinsic::hexagon_V6_vconv_sf_qf32_128B); + auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 32); + auto *RType = FixedVectorType::get(IRB.getFloatTy(), 32); + + // Make sure inputs to vmpy instrinsic are full vectors + if (NumElts == NumHalfEltsInFullVec / 2) { + SmallVector<Constant *, 16> ConcatMask1; + for (unsigned i = 0; i < NumHalfEltsInFullVec; ++i) + ConcatMask1.push_back(IRB.getInt32(i)); + NewOP1 = + IRB.CreateShuffleVector(NewOP1, PoisonValue::get(NewOP1->getType()), + ConstantVector::get(ConcatMask1)); + NewOP2 = + IRB.CreateShuffleVector(NewOP2, PoisonValue::get(NewOP2->getType()), + ConstantVector::get(ConcatMask1)); + } + + if (BitCastOp) { + NewOP1 = IRB.CreateBitCast(NewOP1, InType); + NewOP2 = IRB.CreateBitCast(NewOP2, InType); + } + + Value *NewIn = IRB.CreateCall(ExtF, {NewOP1, NewOP2}); + // Interleave the output elements to ensure correct order in Hi and Lo vectors + // Shuffled Mask: [0, 32, 1, 33, ..., 31, 63] + // Hi: [0, 1, ..., 31] and Lo: [32, 33, ..., 63] + SmallVector<Constant *, 16> Mask; + unsigned HalfVecPoint = NumHalfEltsInFullVec / 2; + for (unsigned i = 0; i < HalfVecPoint; ++i) { + Mask.push_back(IRB.getInt32(i)); + Mask.push_back(IRB.getInt32(HalfVecPoint + i)); + } + NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(NewIn->getType()), + ConstantVector::get(Mask)); + + std::pair<Value *, Value *> SplitOP = opSplit(NewIn, Inst); + Value *ConvHi = IRB.CreateCall(ConvF, {SplitOP.first}); + ConvHi = IRB.CreateBitCast(ConvHi, RType); + + if (ResType->getNumElements() == NumHalfEltsInFullVec / 2) { + return ConvHi; + } + + Value *ConvLo = IRB.CreateCall(ConvF, {SplitOP.second}); + ConvLo = IRB.CreateBitCast(ConvLo, RType); + + SmallVector<Constant *, 16> ShuffleMask; + for (unsigned i = 0; i < NumElts; ++i) + ShuffleMask.push_back(IRB.getInt32(i)); + // Concat Hi and Lo. + NewIn = + IRB.CreateShuffleVector(ConvLo, ConvHi, ConstantVector::get(ShuffleMask)); + return NewIn; +} + +bool HexagonGenWideningVecFloatInstr::replaceWithIntrinsic(Instruction *Inst, + OPInfo &OP1Info, + OPInfo &OP2Info) { + Type *InstTy = Inst->getType(); + Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType(); + unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements(); + [[maybe_unused]] unsigned InstEltSize = EltTy->getPrimitiveSizeInBits(); + + unsigned MaxEltSize = OP1Info.ExtInSize; + unsigned NewOpEltSize = MaxEltSize; + unsigned NewResEltSize = 2 * MaxEltSize; + + unsigned ResVLen = NewResEltSize * NumElts; + if (NewOpEltSize > 16 || ((ResVLen > HwVLen) && (ResVLen % HwVLen) != 0)) + return false; + + Intrinsic::ID IntId = Intrinsic::hexagon_V6_vmpy_qf32_hf_128B; + IRBuilder<> IRB(Inst); + Value *NewOP1 = adjustExtensionForOp(OP1Info, IRB, NewOpEltSize, NumElts); + Value *NewOP2 = adjustExtensionForOp(OP2Info, IRB, NewOpEltSize, NumElts); + + if (NewOP1 == nullptr || NewOP2 == nullptr) + return false; + + if (ResVLen > 2 * HwVLen) { + // The code written in this if block generates the widening code when + // vector-width is 128: + // + // Step 1: Bitcast <128 x half> type to <64 x i32> + // %wide.load = load <128 x half>, <128 x half>* %0 is bitcasted to, + // bitcast <128 x half> %wide.load to <64 x i32> + // + // Step 2: Generate Hi and Lo vectors + // call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %4) + // call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %4) + // + // Perform above 2 steps for both the operands of fmul instruction + // + // Step 3: Generate vmpy_qf32_hf multiply instruction to multiply two Hi + // vectors from both operands. + // call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%5, %8) + // + // Step 4: Convert the resultant 'qf32' output to 'sf' format + // %11 = shufflevector <64 x i32> %10, <64 x i32> poison, <64 x i32> Mask1 + // %12 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %11) + // %13 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %11) + // call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %12) + // call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %13) + // + // Repeat steps 3 and 4 for mutiplication and conversion of Lo vectors. + // Finally merge the output values in correct sequence using shuffle + // vectors. + + assert(ResVLen == 4 * HwVLen); + // Split the operands + unsigned HalfElts = NumElts / 2; + std::pair<Value *, Value *> SplitOP1 = opSplit(NewOP1, Inst); + std::pair<Value *, Value *> SplitOP2 = opSplit(NewOP2, Inst); + auto *castResType = FixedVectorType::get(IRB.getInt32Ty(), HalfElts); + Value *NewInHi = + createIntrinsic(IntId, Inst, SplitOP1.first, SplitOP2.first, + castResType, HalfElts, false); + Value *NewInLo = + createIntrinsic(IntId, Inst, SplitOP1.second, SplitOP2.second, + castResType, HalfElts, false); + assert(InstEltSize == NewResEltSize); + SmallVector<Constant *, 8> ShuffleMask; + for (unsigned i = 0; i < NumElts; ++i) + ShuffleMask.push_back(IRB.getInt32(i)); + // Concat Hi and Lo. + Value *NewIn = IRB.CreateShuffleVector(NewInLo, NewInHi, + ConstantVector::get(ShuffleMask)); + + Inst->replaceAllUsesWith(NewIn); + return true; + } + + auto *ResType = + FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts); + + // The following widening code can only be generated in cases where + // input vectors are 64xhalf/32xhalf and the results are 64xfloat/32xfloat + // respectively. + if (!(NumElts == NumHalfEltsInFullVec && + ResType->getNumElements() == NumHalfEltsInFullVec) && + !(NumElts == NumHalfEltsInFullVec / 2 && + ResType->getNumElements() == NumHalfEltsInFullVec / 2)) + return false; + Value *NewIn = + createIntrinsic(IntId, Inst, NewOP1, NewOP2, ResType, NumElts, true); + + Inst->replaceAllUsesWith(NewIn); + return true; +} + +// Process instruction and replace them with widening vector +// intrinsics if possible. +bool HexagonGenWideningVecFloatInstr::processInstruction(Instruction *Inst) { + Type *InstTy = Inst->getType(); + if (!InstTy->isVectorTy() || + cast<FixedVectorType>(InstTy)->getNumElements() > 128) + return false; + unsigned InstLen = InstTy->getPrimitiveSizeInBits(); + if (!HST->isTypeForHVX(cast<VectorType>(InstTy)) && InstLen != 4 * HwVLen) + return false; + if (InstLen < HwVLen) + return false; + + using namespace PatternMatch; + + Value *OP1 = nullptr, *OP2 = nullptr; + OPInfo OP1Info, OP2Info; + + // Handle the case when Inst = fpext(fmul<64xhalf>(op1, op2)). The Inst can + // be replaced with widening multiply. + if (match(Inst, (m_FPExt((m_FMul(m_Value(OP1), m_Value(OP2))))))) { + OP1Info.ExtInOP = OP1; + OP1Info.ExtInSize = getElementSizeInBits(OP1); + OP2Info.ExtInOP = OP2; + OP2Info.ExtInSize = getElementSizeInBits(OP2); + + if (auto *Op1Vtype = dyn_cast<VectorType>(OP1->getType())) { + if (!Op1Vtype->getElementType()->isHalfTy()) { + return false; + } + } else { + return false; + } + + if (OP1Info.ExtInSize == OP2Info.ExtInSize && OP1Info.ExtInSize == 16 && + getElementSizeInBits(Inst) == 32) { + return replaceWithIntrinsic(Inst, OP1Info, OP2Info); + } + } + + if (!match(Inst, (m_FMul(m_Value(OP1), m_Value(OP2))))) + return false; + + if (!getOperandInfo(OP1, OP1Info) || !getOperandInfo(OP2, OP2Info)) + return false; + + if (!OP1Info.ExtInOP || !OP2Info.ExtInOP) + return false; + + if (OP1Info.ExtInSize == OP2Info.ExtInSize && OP1Info.ExtInSize == 16) { + return replaceWithIntrinsic(Inst, OP1Info, OP2Info); + } + + return false; +} + +bool HexagonGenWideningVecFloatInstr::visitBlock(BasicBlock *B) { + bool Changed = false; + for (auto &I : *B) + Changed |= processInstruction(&I); + return Changed; +} + +bool HexagonGenWideningVecFloatInstr::runOnFunction(Function &F) { + M = F.getParent(); + HST = TM->getSubtargetImpl(F); + + // Return if useHVX128BOps is not set. It can be enabled for 64B mode + // but wil require some changes. For example, bitcast for intrinsics + // assumes 128B mode. + if (skipFunction(F) || !HST->useHVX128BOps()) + return false; + + unsigned VecLength = HST->getVectorLength(); // Vector Length in Bytes + HwVLen = HST->getVectorLength() * 8; // Vector Length in bits + NumHalfEltsInFullVec = + VecLength / + 2; // Number of half (2B) elements that fit into a full HVX vector + bool Changed = false; + for (auto &B : F) + Changed |= visitBlock(&B); + + return Changed; +} + +FunctionPass * +llvm::createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &TM) { + return new HexagonGenWideningVecFloatInstr(&TM); +} diff --git a/llvm/lib/Target/Hexagon/HexagonGenWideningVecInstr.cpp b/llvm/lib/Target/Hexagon/HexagonGenWideningVecInstr.cpp new file mode 100644 index 0000000..297410b --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonGenWideningVecInstr.cpp @@ -0,0 +1,1181 @@ +//===--------------------- HexagonGenWideningVecInstr.cpp -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Replace widening vector operations with hexagon intrinsics. +// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetMachine.h" +#include "llvm/ADT/APInt.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include <algorithm> +#include <utility> + +using namespace llvm; + +// A command line argument to enable the generation of widening instructions +// for short-vectors. +static cl::opt<bool> WidenShortVector( + "hexagon-widen-short-vector", + cl::desc("Generate widening instructions for short vectors."), cl::Hidden); + +namespace llvm { +void initializeHexagonGenWideningVecInstrPass(PassRegistry &); +FunctionPass *createHexagonGenWideningVecInstr(const HexagonTargetMachine &); +} // end namespace llvm + +namespace { + +class HexagonGenWideningVecInstr : public FunctionPass { +public: + static char ID; + + HexagonGenWideningVecInstr() : FunctionPass(ID) { + initializeHexagonGenWideningVecInstrPass(*PassRegistry::getPassRegistry()); + } + + HexagonGenWideningVecInstr(const HexagonTargetMachine *TM) + : FunctionPass(ID), TM(TM) { + initializeHexagonGenWideningVecInstrPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Hexagon generate widening vector instructions"; + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + FunctionPass::getAnalysisUsage(AU); + } + +private: + Module *M = nullptr; + const HexagonTargetMachine *TM = nullptr; + const HexagonSubtarget *HST = nullptr; + unsigned HwVLen; + enum OPKind { OP_None = 0, OP_Add, OP_Sub, OP_Mul, OP_Shl }; + + struct OPInfo { + Value *OP = nullptr; + Value *ExtInOP = nullptr; + bool IsZExt = false; + unsigned ExtInSize = 0; + bool IsScalar = false; + }; + + bool visitBlock(BasicBlock *B); + bool processInstruction(Instruction *Inst); + bool replaceWithIntrinsic(Instruction *Inst, OPKind OPK, OPInfo &OP1Info, + OPInfo &OP2Info); + bool getOperandInfo(Value *V, OPInfo &OPI); + bool isExtendedConstant(Constant *C, bool IsSigned); + unsigned getElementSizeInBits(Value *V, bool IsZExt); + Type *getElementTy(unsigned size, IRBuilder<> &IRB); + + Value *adjustExtensionForOp(OPInfo &OPI, IRBuilder<> &IRB, + unsigned NewEltsize, unsigned NumElts); + + Intrinsic::ID getIntrinsic(OPKind OPK, bool IsOP1ZExt, bool IsOP2ZExt, + unsigned NewOpEltSize, unsigned NewResEltSize, + bool IsConstScalar, int ConstOpNum); + + std::pair<Value *, Value *> opSplit(Value *OP, Instruction *Inst, + Type *NewOpType); + + Value *createIntrinsic(Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, + Value *NewOP2, Type *ResType, unsigned NumElts, + bool Interleave); + bool processInstructionForVMPA(Instruction *Inst); + bool getVmpaOperandInfo(Value *V, OPInfo &OPI); + void reorderVmpaOperands(OPInfo *OPI); + bool replaceWithVmpaIntrinsic(Instruction *Inst, OPInfo *OPI); + bool genSaturatingInst(Instruction *Inst); + bool getMinMax(Constant *MinC, Constant *MaxC, std::pair<int, int> &MinMax); + bool isSaturatingVAsr(Instruction *Inst, Value *S, int MinV, int MaxV, + bool &IsResSigned); + Value *extendShiftByVal(Value *ShiftByVal, IRBuilder<> &IRB); + Intrinsic::ID getVAsrIntrinsic(bool IsInSigned, bool IsResSigned); + Value *createVAsrIntrinsic(Instruction *Inst, Value *VecOP, Value *ShiftByVal, + bool IsResSigned); + bool genVAvg(Instruction *Inst); + bool checkConstantVector(Value *OP, int64_t &SplatVal, bool IsOPZExt); + void updateMPYConst(Intrinsic::ID IntId, int64_t SplatVal, bool IsOPZExt, + Value *&OP, IRBuilder<> &IRB); + void packConstant(Intrinsic::ID IntId, int64_t SplatVal, Value *&OP, + IRBuilder<> &IRB); +}; + +} // end anonymous namespace + +char HexagonGenWideningVecInstr::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonGenWideningVecInstr, "widening-vec", + "Hexagon generate " + "widening vector instructions", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(HexagonGenWideningVecInstr, "widening-vec", + "Hexagon generate " + "widening vector instructions", + false, false) + +static bool hasNegativeValues(Constant *C) { + if (Value *SplatV = C->getSplatValue()) { + auto *CI = dyn_cast<ConstantInt>(SplatV); + assert(CI); + return CI->getValue().isNegative(); + } + unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements(); + for (unsigned i = 0, e = NumElts; i != e; ++i) { + auto *CI = dyn_cast<ConstantInt>(C->getAggregateElement(i)); + assert(CI); + if (CI->getValue().isNegative()) + return true; + continue; + } + return false; +} + +bool HexagonGenWideningVecInstr::getOperandInfo(Value *V, OPInfo &OPI) { + using namespace PatternMatch; + OPI.OP = V; + Value *ExtV = nullptr; + Constant *C = nullptr; + + bool Match = false; + if ((Match = (match(V, (m_ZExt(m_Value(ExtV)))) || + match(V, m_Shuffle(m_InsertElt(m_Poison(), + m_ZExt(m_Value(ExtV)), m_Zero()), + m_Poison(), m_ZeroMask()))))) { + OPI.ExtInOP = ExtV; + OPI.IsZExt = true; + } + + if (!Match && + (Match = (match(V, (m_SExt(m_Value(ExtV)))) || + match(V, m_Shuffle(m_InsertElt(m_Poison(), + m_SExt(m_Value(ExtV)), m_Zero()), + m_Poison(), m_ZeroMask()))))) { + OPI.ExtInOP = ExtV; + OPI.IsZExt = false; + } + if (!Match && + (Match = + (match(V, m_Shuffle(m_InsertElt(m_Poison(), m_Value(ExtV), m_Zero()), + m_Poison(), m_ZeroMask()))))) { + if (match(ExtV, m_And(m_Value(), m_SpecificInt(255)))) { + OPI.ExtInOP = ExtV; + OPI.IsZExt = true; + OPI.ExtInSize = 8; + return true; + } + if (match(ExtV, m_And(m_Value(), m_SpecificInt(65535)))) { + OPI.ExtInOP = ExtV; + OPI.IsZExt = true; + OPI.ExtInSize = 16; + return true; + } + return false; + } + + if (!Match && (Match = match(V, m_Constant(C)))) { + if (!isExtendedConstant(C, false) && !isExtendedConstant(C, true)) + return false; + OPI.ExtInOP = C; + OPI.IsZExt = !hasNegativeValues(C); + } + + if (!Match) + return false; + + // If the operand is extended, find the element size of its input. + if (OPI.ExtInOP) + OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP, OPI.IsZExt); + return true; +} + +bool HexagonGenWideningVecInstr::isExtendedConstant(Constant *C, + bool IsSigned) { + Type *CTy = cast<FixedVectorType>(C->getType())->getElementType(); + unsigned EltSize = CTy->getPrimitiveSizeInBits(); + unsigned HalfSize = EltSize / 2; + if (Value *SplatV = C->getSplatValue()) { + if (auto *CI = dyn_cast<ConstantInt>(SplatV)) + return IsSigned ? isIntN(HalfSize, CI->getSExtValue()) + : isUIntN(HalfSize, CI->getZExtValue()); + return false; + } + unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements(); + for (unsigned i = 0, e = NumElts; i != e; ++i) { + if (auto *CI = dyn_cast<ConstantInt>(C->getAggregateElement(i))) { + if ((IsSigned && !isIntN(HalfSize, CI->getSExtValue())) || + (!IsSigned && !isUIntN(HalfSize, CI->getZExtValue()))) + return false; + continue; + } + return false; + } + return true; +} + +unsigned HexagonGenWideningVecInstr::getElementSizeInBits(Value *V, + bool IsZExt = false) { + using namespace PatternMatch; + Type *ValTy = V->getType(); + Type *EltTy = ValTy; + if (auto *C = dyn_cast<Constant>(V)) { + unsigned NumElts = cast<FixedVectorType>(EltTy)->getNumElements(); + unsigned EltSize = cast<FixedVectorType>(EltTy) + ->getElementType() + ->getPrimitiveSizeInBits() + .getKnownMinValue(); + unsigned ReducedSize = EltSize / 2; + + while (ReducedSize >= 8) { + for (unsigned i = 0, e = NumElts; i != e; ++i) { + if (auto *CI = dyn_cast<ConstantInt>(C->getAggregateElement(i))) { + if (IsZExt) { + if (!isUIntN(ReducedSize, CI->getZExtValue())) + return EltSize; + } else if (!isIntN(ReducedSize, CI->getSExtValue())) + return EltSize; + } + } + EltSize = ReducedSize; + ReducedSize = ReducedSize / 2; + } + return EltSize; + } + + if (ValTy->isVectorTy()) + EltTy = cast<FixedVectorType>(ValTy)->getElementType(); + return EltTy->getPrimitiveSizeInBits(); +} + +Value *HexagonGenWideningVecInstr::adjustExtensionForOp(OPInfo &OPI, + IRBuilder<> &IRB, + unsigned NewExtSize, + unsigned NumElts) { + Value *V = OPI.ExtInOP; + bool IsZExt = OPI.IsZExt; + unsigned EltSize = getElementSizeInBits(OPI.ExtInOP, OPI.IsZExt); + Type *EltType = getElementTy(NewExtSize, IRB); + auto *NewOpTy = FixedVectorType::get(EltType, NumElts); + + if (dyn_cast<Constant>(V)) + return IRB.CreateTrunc(V, NewOpTy); + + if (V->getType()->isVectorTy()) { + if (NewExtSize == EltSize) + return V; + assert(NewExtSize == 16); + auto *NewOpTy = FixedVectorType::get(IRB.getInt16Ty(), NumElts); + return (IsZExt) ? IRB.CreateZExt(V, NewOpTy) : IRB.CreateSExt(V, NewOpTy); + } + + // The operand must correspond to a shuffle vector which is used to construct + // a vector out of a scalar. Since the scalar value (V) is extended, + // replace it with a new shuffle vector with the smaller element size. + [[maybe_unused]] auto *I = dyn_cast<Instruction>(OPI.OP); + assert(I && I->getOpcode() == Instruction::ShuffleVector); + + if (NewExtSize > EltSize) + V = (IsZExt) ? IRB.CreateZExt(V, EltType) : IRB.CreateSExt(V, EltType); + else if (NewExtSize < EltSize) + V = IRB.CreateTrunc(V, EltType); + + Value *IE = + IRB.CreateInsertElement(PoisonValue::get(NewOpTy), V, IRB.getInt32(0)); + + SmallVector<Constant *, 8> ShuffleMask; + for (unsigned i = 0; i < NumElts; ++i) + ShuffleMask.push_back(IRB.getInt32(0)); + + return IRB.CreateShuffleVector(IE, PoisonValue::get(NewOpTy), + ConstantVector::get(ShuffleMask)); +} + +Intrinsic::ID HexagonGenWideningVecInstr::getIntrinsic( + OPKind OPK, bool IsOP1ZExt, bool IsOP2ZExt, unsigned InEltSize, + unsigned ResEltSize, bool IsConstScalar, int ConstOpNum) { + // Since the operands have been extended, the ResEltSize must be 16 or more. + switch (OPK) { + case OP_Add: + // Both operands should be either zero extended or sign extended. + assert(IsOP1ZExt == IsOP2ZExt); + if (InEltSize == 8 && ResEltSize == 16) { + // Operands must be zero extended as we don't have a widening vector + // 'add' that can take signed exteded values. + assert(IsOP1ZExt && "Operands must be zero-extended"); + return Intrinsic::hexagon_vadd_uu; + } + if (InEltSize == 16 && ResEltSize == 32) + return (IsOP1ZExt) ? Intrinsic::hexagon_vadd_uu + : Intrinsic::hexagon_vadd_ss; + + llvm_unreachable("Incorrect input and output operand sizes"); + + case OP_Sub: + // Both operands should be either zero extended or sign extended. + assert(IsOP1ZExt == IsOP2ZExt); + if (InEltSize == 8 && ResEltSize == 16) { + // Operands must be zero extended as we don't have a widening vector + // 'sub' that can take signed exteded values. + assert(IsOP1ZExt && "Operands must be zero-extended"); + return Intrinsic::hexagon_vsub_uu; + } + if (InEltSize == 16 && ResEltSize == 32) + return (IsOP1ZExt) ? Intrinsic::hexagon_vsub_uu + : Intrinsic::hexagon_vsub_ss; + + llvm_unreachable("Incorrect input and output operand sizes"); + + case OP_Mul: + assert(ResEltSize == 2 * InEltSize); + // Enter inside 'if' block when one of the operand is constant vector + if (IsConstScalar) { + // When inputs are of 8bit type and output is 16bit type, enter 'if' block + if (InEltSize == 8 && ResEltSize == 16) { + // Enter the 'if' block, when 2nd operand of the mul instruction is + // constant vector, otherwise enter 'else' block + if (ConstOpNum == 2 && IsOP1ZExt) { + // If the value inside the constant vector is zero-extended, then + // return hexagon_vmpy_ub_ub, else return hexagon_vmpy_ub_b + return (IsOP2ZExt) ? Intrinsic::hexagon_vmpy_ub_ub + : Intrinsic::hexagon_vmpy_ub_b; + } else if (ConstOpNum == 1 && IsOP2ZExt) { + return (IsOP1ZExt) ? Intrinsic::hexagon_vmpy_ub_ub + : Intrinsic::hexagon_vmpy_ub_b; + } + } + // When inputs are of 16bit type and output is 32bit type, + // enter 'if' block + if (InEltSize == 16 && ResEltSize == 32) { + if (IsOP1ZExt && IsOP2ZExt) { + // If the value inside the constant vector and other operand is + // zero-extended, then return hexagon_vmpy_uh_uh + return Intrinsic::hexagon_vmpy_uh_uh; + } else if (!IsOP1ZExt && !IsOP2ZExt) { + // If the value inside the constant vector and other operand is + // sign-extended, then return hexagon_vmpy_h_h + return Intrinsic::hexagon_vmpy_h_h; + } + } + } + if (IsOP1ZExt) + return IsOP2ZExt ? Intrinsic::hexagon_vmpy_uu + : Intrinsic::hexagon_vmpy_us; + else + return IsOP2ZExt ? Intrinsic::hexagon_vmpy_su + : Intrinsic::hexagon_vmpy_ss; + default: + llvm_unreachable("Instruction not handled!"); + } +} + +Type *HexagonGenWideningVecInstr::getElementTy(unsigned size, + IRBuilder<> &IRB) { + switch (size) { + case 8: + return IRB.getInt8Ty(); + case 16: + return IRB.getInt16Ty(); + case 32: + return IRB.getInt32Ty(); + default: + llvm_unreachable("Unhandled Element size"); + } +} + +Value *HexagonGenWideningVecInstr::createIntrinsic( + Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, Value *NewOP2, + Type *ResType, unsigned NumElts, bool Interleave = true) { + IRBuilder<> IRB(Inst); + Function *ExtF = Intrinsic::getOrInsertDeclaration(M, IntId, ResType); + Value *NewIn = IRB.CreateCall(ExtF, {NewOP1, NewOP2}); + if (Interleave) { + // Interleave elements in the output vector. + SmallVector<Constant *, 16> ShuffleMask; + unsigned HalfElts = NumElts / 2; + for (unsigned i = 0; i < HalfElts; ++i) { + ShuffleMask.push_back(IRB.getInt32(i)); + ShuffleMask.push_back(IRB.getInt32(HalfElts + i)); + } + NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(ResType), + ConstantVector::get(ShuffleMask)); + } + return NewIn; +} + +std::pair<Value *, Value *> +HexagonGenWideningVecInstr::opSplit(Value *OP, Instruction *Inst, + Type *NewOpType) { + Type *InstTy = Inst->getType(); + unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements(); + IRBuilder<> IRB(Inst); + if (InstTy->getPrimitiveSizeInBits() < 2 * HwVLen) { + // The only time we need to split an OP even though it is not a + // vector-pair is while generating vasr instruction for the short vector. + // Since hi/lo intrinsics can't be used here as they expect the operands to + // be of 64xi32 type, the shuffle_vector pair with the appropriate masks is + // used instead. + assert(NumElts % 2 == 0 && "Unexpected Vector Type!!"); + unsigned HalfElts = NumElts / 2; + SmallVector<Constant *, 8> HiM; + SmallVector<Constant *, 8> LoM; + for (unsigned i = 0; i < HalfElts; ++i) + LoM.push_back(IRB.getInt32(i)); + for (unsigned i = 0; i < HalfElts; ++i) + HiM.push_back(IRB.getInt32(HalfElts + i)); + + Value *Hi = IRB.CreateShuffleVector(OP, PoisonValue::get(OP->getType()), + ConstantVector::get(HiM)); + Value *Lo = IRB.CreateShuffleVector(OP, PoisonValue::get(OP->getType()), + ConstantVector::get(LoM)); + return std::pair<Value *, Value *>(Hi, Lo); + } + + Intrinsic::ID IntHi = Intrinsic::hexagon_V6_hi_128B; + Intrinsic::ID IntLo = Intrinsic::hexagon_V6_lo_128B; + Function *ExtFHi = Intrinsic::getOrInsertDeclaration(M, IntHi); + Function *ExtFLo = Intrinsic::getOrInsertDeclaration(M, IntLo); + auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 64); + OP = IRB.CreateBitCast(OP, InType); + Value *Hi = IRB.CreateCall(ExtFHi, {OP}); // 32xi32 + Value *Lo = IRB.CreateCall(ExtFLo, {OP}); + Hi = IRB.CreateBitCast(Hi, NewOpType); + Lo = IRB.CreateBitCast(Lo, NewOpType); + return std::pair<Value *, Value *>(Hi, Lo); +} + +bool HexagonGenWideningVecInstr::checkConstantVector(Value *OP, + int64_t &SplatVal, + bool IsOPZExt) { + if (auto *C1 = dyn_cast<Constant>(OP)) { + if (Value *SplatV = C1->getSplatValue()) { + auto *CI = dyn_cast<ConstantInt>(SplatV); + if (IsOPZExt) { + SplatVal = CI->getZExtValue(); + } else { + SplatVal = CI->getSExtValue(); + } + return true; + } + } + return false; +} + +void HexagonGenWideningVecInstr::updateMPYConst(Intrinsic::ID IntId, + int64_t SplatVal, bool IsOPZExt, + Value *&OP, IRBuilder<> &IRB) { + if ((IntId == Intrinsic::hexagon_vmpy_uu || + IntId == Intrinsic::hexagon_vmpy_us || + IntId == Intrinsic::hexagon_vmpy_su || + IntId == Intrinsic::hexagon_vmpy_ss) && + OP->getType()->isVectorTy()) { + // Create a vector with all elements equal to SplatVal + Type *VecTy = OP->getType(); + Value *splatVector = + ConstantInt::get(VecTy, static_cast<uint32_t>(SplatVal)); + OP = IsOPZExt ? IRB.CreateZExt(splatVector, VecTy) + : IRB.CreateSExt(splatVector, VecTy); + } else { + packConstant(IntId, SplatVal, OP, IRB); + } +} + +void HexagonGenWideningVecInstr::packConstant(Intrinsic::ID IntId, + int64_t SplatVal, Value *&OP, + IRBuilder<> &IRB) { + uint32_t Val32 = static_cast<uint32_t>(SplatVal); + if (IntId == Intrinsic::hexagon_vmpy_ub_ub) { + assert(SplatVal >= 0 && SplatVal <= UINT8_MAX); + uint32_t packed = (Val32 << 24) | (Val32 << 16) | (Val32 << 8) | Val32; + OP = IRB.getInt32(packed); + } else if (IntId == Intrinsic::hexagon_vmpy_ub_b) { + assert(SplatVal >= INT8_MIN && SplatVal <= INT8_MAX); + uint32_t packed = (Val32 << 24) | ((Val32 << 16) & ((1 << 24) - 1)) | + ((Val32 << 8) & ((1 << 16) - 1)) | + (Val32 & ((1 << 8) - 1)); + OP = IRB.getInt32(packed); + } else if (IntId == Intrinsic::hexagon_vmpy_uh_uh) { + assert(SplatVal >= 0 && SplatVal <= UINT16_MAX); + uint32_t packed = (Val32 << 16) | Val32; + OP = IRB.getInt32(packed); + } else if (IntId == Intrinsic::hexagon_vmpy_h_h) { + assert(SplatVal >= INT16_MIN && SplatVal <= INT16_MAX); + uint32_t packed = (Val32 << 16) | (Val32 & ((1 << 16) - 1)); + OP = IRB.getInt32(packed); + } +} + +bool HexagonGenWideningVecInstr::replaceWithIntrinsic(Instruction *Inst, + OPKind OPK, + OPInfo &OP1Info, + OPInfo &OP2Info) { + Type *InstTy = Inst->getType(); + Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType(); + unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements(); + unsigned InstEltSize = EltTy->getPrimitiveSizeInBits(); + + bool IsOP1ZExt = OP1Info.IsZExt; + bool IsOP2ZExt = OP2Info.IsZExt; + + // The resulting values of 'add' and 'sub' are always sign-extended. + bool IsResZExt = (OPK == OP_Mul || OPK == OP_Shl) + ? (OP1Info.IsZExt && OP2Info.IsZExt) + : false; + + unsigned MaxEltSize = std::max(OP1Info.ExtInSize, OP2Info.ExtInSize); + unsigned NewOpEltSize = MaxEltSize; + unsigned NewResEltSize = 2 * MaxEltSize; + + // For Add and Sub, both the operands should be either zero extended + // or sign extended. In case of a mismatch, they are extended to the + // next size (ex: 8 bits -> 16 bits) so that the sign-extended vadd/vsub + // instructions can be used. Also, we don't support 8-bits signed vadd/vsub + // instructions. They are extended to 16-bits and then signed 16-bits + // non-widening vadd/vsub is used to perform the operation. + if (OPK != OP_Mul && OPK != OP_Shl && + (IsOP1ZExt != IsOP2ZExt || (!IsOP1ZExt && NewOpEltSize == 8))) + NewOpEltSize = 2 * NewOpEltSize; + + unsigned ResVLen = NewResEltSize * NumElts; + if (ResVLen < HwVLen && !WidenShortVector) + return false; + if (NewOpEltSize > 16 || ((ResVLen > HwVLen) && (ResVLen % HwVLen) != 0)) + return false; + + IRBuilder<> IRB(Inst); + Value *NewOP1 = adjustExtensionForOp(OP1Info, IRB, NewOpEltSize, NumElts); + Value *NewOP2 = adjustExtensionForOp(OP2Info, IRB, NewOpEltSize, NumElts); + + if (NewOpEltSize == NewResEltSize) { + assert(OPK != OP_Mul && OPK != OP_Shl); + // Instead of intrinsics, use vector add/sub. + Value *NewIn = IRB.CreateBinOp(cast<BinaryOperator>(Inst)->getOpcode(), + NewOP1, NewOP2); + if (InstEltSize > NewResEltSize) + NewIn = IRB.CreateSExt(NewIn, InstTy); + Inst->replaceAllUsesWith(NewIn); + return true; + } + + bool IsConstScalar = false; + int64_t SplatVal = 0; + int ConstOpNum = 1; + if (OPK == OP_Mul || OPK == OP_Shl) { + IsConstScalar = checkConstantVector(NewOP1, SplatVal, IsOP1ZExt); + if (!IsConstScalar) { + IsConstScalar = checkConstantVector(NewOP2, SplatVal, IsOP2ZExt); + ConstOpNum = 2; + } + } + + if (IsConstScalar && OPK == OP_Shl) { + if (((NewOpEltSize == 8) && (SplatVal > 0) && (SplatVal < 8)) || + ((NewOpEltSize == 16) && (SplatVal > 0) && (SplatVal < 16))) { + SplatVal = 1LL << SplatVal; + OPK = OP_Mul; + } else { + return false; + } + } else if (!IsConstScalar && OPK == OP_Shl) { + return false; + } + + Intrinsic::ID IntId = getIntrinsic(OPK, IsOP1ZExt, IsOP2ZExt, NewOpEltSize, + NewResEltSize, IsConstScalar, ConstOpNum); + + if (IsConstScalar) { + updateMPYConst(IntId, SplatVal, IsOP2ZExt, NewOP2, IRB); + } + + // Split the node if it needs more than a vector pair for the result. + if (ResVLen > 2 * HwVLen) { + assert(ResVLen == 4 * HwVLen); + // Split the operands + unsigned HalfElts = NumElts / 2; + auto *NewOpType = + FixedVectorType::get(getElementTy(NewOpEltSize, IRB), HalfElts); + auto *ResType = + FixedVectorType::get(getElementTy(NewResEltSize, IRB), HalfElts); + std::pair<Value *, Value *> SplitOP1 = opSplit(NewOP1, Inst, NewOpType); + std::pair<Value *, Value *> SplitOP2; + if (IsConstScalar && (IntId == Intrinsic::hexagon_vmpy_h_h || + IntId == Intrinsic::hexagon_vmpy_uh_uh)) { + SplitOP2 = std::pair<Value *, Value *>(NewOP2, NewOP2); + } else { + SplitOP2 = opSplit(NewOP2, Inst, NewOpType); + } + Value *NewInHi = createIntrinsic(IntId, Inst, SplitOP1.first, + SplitOP2.first, ResType, HalfElts, true); + Value *NewInLo = createIntrinsic(IntId, Inst, SplitOP1.second, + SplitOP2.second, ResType, HalfElts, true); + assert(InstEltSize == NewResEltSize); + SmallVector<Constant *, 8> ShuffleMask; + for (unsigned i = 0; i < NumElts; ++i) + ShuffleMask.push_back(IRB.getInt32(i)); + // Concat Hi and Lo. + Value *NewIn = IRB.CreateShuffleVector(NewInLo, NewInHi, + ConstantVector::get(ShuffleMask)); + + Inst->replaceAllUsesWith(NewIn); + return true; + } + + auto *ResType = + FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts); + Value *NewIn = + createIntrinsic(IntId, Inst, NewOP1, NewOP2, ResType, NumElts, true); + if (InstEltSize > NewResEltSize) + NewIn = (IsResZExt) ? IRB.CreateZExt(NewIn, InstTy) + : IRB.CreateSExt(NewIn, InstTy); + + Inst->replaceAllUsesWith(NewIn); + + return true; +} + +// Process instruction and replace them with widening vector +// intrinsics if possible. +bool HexagonGenWideningVecInstr::processInstruction(Instruction *Inst) { + Type *InstTy = Inst->getType(); + if (!InstTy->isVectorTy() || + cast<FixedVectorType>(InstTy)->getNumElements() > 128) + return false; + unsigned InstLen = InstTy->getPrimitiveSizeInBits(); + if (!HST->isTypeForHVX(cast<VectorType>(InstTy)) && InstLen != 4 * HwVLen) + return false; + if (InstLen < HwVLen && !WidenShortVector) + return false; + + using namespace PatternMatch; + + OPKind OPK; + Value *OP1 = nullptr, *OP2 = nullptr; + if (match(Inst, (m_Sub(m_Value(OP1), m_Value(OP2))))) + OPK = OP_Sub; + else if (match(Inst, (m_Add(m_Value(OP1), m_Value(OP2))))) + OPK = OP_Add; + else if (match(Inst, (m_Mul(m_Value(OP1), m_Value(OP2))))) + OPK = OP_Mul; + else if (match(Inst, (m_Shl(m_Value(OP1), m_Value(OP2))))) + OPK = OP_Shl; + else + return false; + + OPInfo OP1Info, OP2Info; + + if (!getOperandInfo(OP1, OP1Info) || !getOperandInfo(OP2, OP2Info)) + return false; + + // Proceed only if both input operands are extended. + if (!OP1Info.ExtInOP || !OP2Info.ExtInOP) + return false; + + return replaceWithIntrinsic(Inst, OPK, OP1Info, OP2Info); +} + +bool HexagonGenWideningVecInstr::getVmpaOperandInfo(Value *V, OPInfo &OPI) { + using namespace PatternMatch; + OPI.OP = V; + Value *ExtV, *OP1 = nullptr; + + if (match(V, + m_ZExt(m_Shuffle(m_InsertElt(m_Poison(), m_Value(ExtV), m_Zero()), + m_Poison(), m_ZeroMask()))) || + match(V, + m_Shuffle(m_InsertElt(m_Poison(), m_ZExt(m_Value(ExtV)), m_Zero()), + m_Poison(), m_ZeroMask()))) { + OPI.ExtInOP = ExtV; + OPI.IsZExt = true; + OPI.IsScalar = true; + OPI.ExtInSize = ExtV->getType()->getPrimitiveSizeInBits(); + return true; + } + + ConstantInt *I = nullptr; + if ((match(V, m_Shuffle(m_InsertElt(m_Poison(), m_Value(ExtV), m_Zero()), + m_Poison(), m_ZeroMask())))) { + if (match(ExtV, m_And(m_Value(OP1), m_ConstantInt(I)))) { + uint32_t IValue = I->getZExtValue(); + if (IValue <= 255) { + OPI.ExtInOP = ExtV; + OPI.IsZExt = true; + OPI.ExtInSize = 8; + OPI.IsScalar = true; + return true; + } + } + } + + // Match for non-scalar operands + return getOperandInfo(V, OPI); +} + +// Process instruction and replace with the vmpa intrinsic if possible. +bool HexagonGenWideningVecInstr::processInstructionForVMPA(Instruction *Inst) { + using namespace PatternMatch; + Type *InstTy = Inst->getType(); + // TODO: Extend it to handle short vector instructions (< HwVLen). + // vmpa instructions produce a vector register pair. + if (!InstTy->isVectorTy() || InstTy->getPrimitiveSizeInBits() != 2 * HwVLen) + return false; + + Value *OP1 = nullptr, *OP2 = nullptr; + if (!match(Inst, (m_Add(m_Value(OP1), m_Value(OP2))))) + return false; + + Value *OP[4] = {nullptr, nullptr, nullptr, nullptr}; + if (!match(OP1, m_Mul(m_Value(OP[0]), m_Value(OP[1]))) || + !match(OP2, m_Mul(m_Value(OP[2]), m_Value(OP[3])))) + return false; + + OPInfo OP_Info[4]; + for (unsigned i = 0; i < 4; i++) + if (!getVmpaOperandInfo(OP[i], OP_Info[i]) || !OP_Info[i].ExtInOP) + return false; + + return replaceWithVmpaIntrinsic(Inst, OP_Info); +} + +// Reorder operand info in OPI so that the vector operands come before their +// scalar counterparts. +void HexagonGenWideningVecInstr::reorderVmpaOperands(OPInfo *OPI) { + for (unsigned i = 0; i < 2; i++) + if (!OPI[2 * i].ExtInOP->getType()->isVectorTy()) { + OPInfo Temp; + Temp = OPI[2 * i]; + OPI[2 * i] = OPI[2 * i + 1]; + OPI[2 * i + 1] = Temp; + } +} + +// Only handles the case where one input to vmpa has to be a scalar +// and another is a vector. It can be easily extended to cover +// other types of vmpa instructions. +bool HexagonGenWideningVecInstr::replaceWithVmpaIntrinsic(Instruction *Inst, + OPInfo *OPI) { + reorderVmpaOperands(OPI); + + // After reordering of the operands in OPI, the odd elements must have + // IsScalar flag set to true. Also, check the even elements for non-scalars. + if (!OPI[1].IsScalar || !OPI[3].IsScalar || OPI[0].IsScalar || + OPI[2].IsScalar) + return false; + + OPInfo SOPI1 = OPI[1]; + OPInfo SOPI2 = OPI[3]; + + // The scalar operand in the vmpa instructions needs to be an int8. + if (SOPI1.ExtInSize != SOPI2.ExtInSize || SOPI1.ExtInSize != 8) + return false; + + Type *InstTy = Inst->getType(); + Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType(); + unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements(); + unsigned InstEltSize = EltTy->getPrimitiveSizeInBits(); + + unsigned MaxVEltSize = std::max(OPI[0].ExtInSize, OPI[2].ExtInSize); + unsigned NewVOpEltSize = MaxVEltSize; + unsigned NewResEltSize = 2 * MaxVEltSize; + + if (NumElts * NewVOpEltSize < HwVLen) { + // Extend the operand so that we don't end up with an invalid vector size. + NewVOpEltSize = 2 * NewVOpEltSize; + NewResEltSize = 2 * NewResEltSize; + } + + IRBuilder<> IRB(Inst); + + // Construct scalar operand + Value *NewSOP1 = SOPI1.ExtInOP; + Value *NewSOP2 = SOPI2.ExtInOP; + + Type *S1Ty = NewSOP1->getType(); + Type *S2Ty = NewSOP2->getType(); + if (S1Ty->getPrimitiveSizeInBits() < 32) + NewSOP1 = IRB.CreateZExt(NewSOP1, IRB.getInt32Ty()); + if (S2Ty->getPrimitiveSizeInBits() < 32) + NewSOP2 = IRB.CreateZExt(NewSOP2, IRB.getInt32Ty()); + + Value *SHL = IRB.CreateShl(NewSOP1, IRB.getInt32(8)); + Value *OR = IRB.CreateOr(SHL, NewSOP2); + Intrinsic::ID CombineIntID = Intrinsic::hexagon_A2_combine_ll; + Function *ExtF = Intrinsic::getOrInsertDeclaration(M, CombineIntID); + Value *ScalarOP = IRB.CreateCall(ExtF, {OR, OR}); + + // Construct vector operand + Value *NewVOP1 = adjustExtensionForOp(OPI[0], IRB, NewVOpEltSize, NumElts); + Value *NewVOP2 = adjustExtensionForOp(OPI[2], IRB, NewVOpEltSize, NumElts); + + // Combine both vector operands to form the vector-pair for vmpa + Intrinsic::ID VCombineIntID = Intrinsic::hexagon_V6_vcombine_128B; + ExtF = Intrinsic::getOrInsertDeclaration(M, VCombineIntID); + Type *InType = FixedVectorType::get(IRB.getInt32Ty(), 32); + NewVOP1 = IRB.CreateBitCast(NewVOP1, InType); + NewVOP2 = IRB.CreateBitCast(NewVOP2, InType); + Value *VecOP = IRB.CreateCall(ExtF, {NewVOP1, NewVOP2}); + + Intrinsic::ID VmpaIntID = (NewResEltSize == 16) + ? Intrinsic::hexagon_V6_vmpabus_128B + : Intrinsic::hexagon_V6_vmpauhb_128B; + ExtF = Intrinsic::getOrInsertDeclaration(M, VmpaIntID); + auto *ResType = + FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts); + Value *NewIn = IRB.CreateCall(ExtF, {VecOP, ScalarOP}); + NewIn = IRB.CreateBitCast(NewIn, ResType); + + if (InstEltSize > NewResEltSize) + // Extend the output to match the original instruction type. + NewIn = IRB.CreateSExt(NewIn, InstTy); + + // Interleave elements in the output vector. + SmallVector<Constant *, 16> ShuffleMask; + unsigned HalfElts = NumElts / 2; + for (unsigned i = 0; i < HalfElts; ++i) { + ShuffleMask.push_back(IRB.getInt32(i)); + ShuffleMask.push_back(IRB.getInt32(HalfElts + i)); + } + NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(ResType), + ConstantVector::get(ShuffleMask)); + + Inst->replaceAllUsesWith(NewIn); + return true; +} + +bool HexagonGenWideningVecInstr::genSaturatingInst(Instruction *Inst) { + Type *InstTy = Inst->getType(); + assert(InstTy->isVectorTy()); + if (InstTy->getPrimitiveSizeInBits() > HwVLen) + return false; + + using namespace PatternMatch; + CmpPredicate P1, P2; + Value *L1 = nullptr, *T1 = nullptr, *L2 = nullptr, *T2 = nullptr, + *L3 = nullptr; + Constant *RC1 = nullptr, *FC1 = nullptr, *RC2 = nullptr, *FC2 = nullptr, + *RC3 = nullptr; + + // Pattern of interest: ashr -> llvm.smin -> llvm.smax -> trunc + // Match trunc instruction + if (match(Inst, m_Trunc(m_Intrinsic<Intrinsic::smax>(m_Value(L1), + m_Constant(RC1))))) { + // Match llvm.smin instruction + if (match(L1, m_Intrinsic<Intrinsic::smin>(m_Value(L2), m_Constant(RC2)))) { + // Match ashr instruction + if (match(L2, m_AShr(m_Value(L3), m_Constant(RC3)))) { + std::pair<int, int> MinMax; + // get min, max values from operatands of smin and smax + if (getMinMax(RC1, RC2, MinMax)) { + bool IsResSigned; + // Validate the saturating vasr pattern + if (isSaturatingVAsr(Inst, L2, MinMax.first, MinMax.second, + IsResSigned)) { + // Get the shift value from the ashr operand + ConstantInt *shift_val = + dyn_cast<ConstantInt>(RC3->getSplatValue()); + if (shift_val) { + Value *NewIn = + createVAsrIntrinsic(Inst, L3, shift_val, IsResSigned); + Inst->replaceAllUsesWith(NewIn); + return true; + } + } + } + } + } + } + + if (!match(Inst, (m_Trunc(m_Select(m_ICmp(P1, m_Value(L1), m_Constant(RC1)), + m_Value(T1), m_Constant(FC1))))) || + (T1 != L1 || FC1 != RC1)) + return false; + + if (!match(L1, m_Select(m_ICmp(P2, m_Value(L2), m_Constant(RC2)), m_Value(T2), + m_Constant(FC2))) || + (T2 != L2 || FC2 != RC2)) + return false; + + if (!((P1 == CmpInst::ICMP_SGT && P2 == CmpInst::ICMP_SLT) || + (P1 == CmpInst::ICMP_SLT && P2 == CmpInst::ICMP_SGT))) + return false; + + std::pair<int, int> MinMax; + if ((P1 == CmpInst::ICMP_SGT) && (P2 == CmpInst::ICMP_SLT)) { + if (!getMinMax(RC1, RC2, MinMax)) + return false; + } else if (!getMinMax(RC2, RC1, MinMax)) + return false; + + Value *S = L2; // Value being saturated + + // Only AShr instructions are handled. + // Also, second operand to AShr must be a scalar. + Value *OP1 = nullptr, *ShiftByVal = nullptr; + if (!match(S, m_AShr(m_Value(OP1), + m_Shuffle(m_InsertElt(m_Poison(), m_Value(ShiftByVal), + m_Zero()), + m_Poison(), m_ZeroMask())))) + return false; + + bool IsResSigned; + if (!isSaturatingVAsr(Inst, S, MinMax.first, MinMax.second, IsResSigned)) + return false; + + Value *NewIn = createVAsrIntrinsic(Inst, OP1, ShiftByVal, IsResSigned); + Inst->replaceAllUsesWith(NewIn); + return true; +} + +Value *HexagonGenWideningVecInstr::extendShiftByVal(Value *ShiftByVal, + IRBuilder<> &IRB) { + using namespace PatternMatch; + Value *A = nullptr; + if (match(ShiftByVal, m_Trunc(m_Value(A)))) + return A; + return IRB.CreateZExt(ShiftByVal, IRB.getInt32Ty()); +} + +bool HexagonGenWideningVecInstr::getMinMax(Constant *MinC, Constant *MaxC, + std::pair<int, int> &MinMax) { + Value *SplatV; + if (!(SplatV = MinC->getSplatValue()) || !(dyn_cast<ConstantInt>(SplatV))) + return false; + if (!(SplatV = MaxC->getSplatValue()) || !(dyn_cast<ConstantInt>(SplatV))) + return false; + + ConstantInt *MinI = dyn_cast<ConstantInt>(MinC->getSplatValue()); + ConstantInt *MaxI = dyn_cast<ConstantInt>(MaxC->getSplatValue()); + MinMax = std::pair<int, int>(MinI->getSExtValue(), MaxI->getSExtValue()); + return true; +} + +bool HexagonGenWideningVecInstr::isSaturatingVAsr(Instruction *Inst, Value *S, + int MinV, int MaxV, + bool &IsResSigned) { + if (MinV >= MaxV) + return false; + + IsResSigned = true; + Type *InstTy = Inst->getType(); + Type *EltTy = cast<VectorType>(InstTy)->getElementType(); + unsigned TruncSize = EltTy->getPrimitiveSizeInBits(); + + int MaxRange, MinRange; + if (MinV < 0) { // Saturate to a signed value + MaxRange = (1 << (TruncSize - 1)) - 1; + MinRange = -(1 << (TruncSize - 1)); + } else if (MinV == 0) { // Saturate to an unsigned value + MaxRange = (1 << (TruncSize)) - 1; + MinRange = 0; + IsResSigned = false; + } else + return false; + + if (MinV != MinRange || MaxV != MaxRange) + return false; + + auto *SInst = dyn_cast<Instruction>(S); + if (SInst->getOpcode() == Instruction::AShr) { + Type *SInstTy = SInst->getType(); + Type *SEltTy = cast<VectorType>(SInstTy)->getElementType(); + unsigned SInstEltSize = SEltTy->getPrimitiveSizeInBits(); + if (SInstEltSize != 2 * TruncSize || TruncSize > 16) + return false; + } + return true; +} + +Intrinsic::ID HexagonGenWideningVecInstr::getVAsrIntrinsic(bool IsInSigned, + bool IsResSigned) { + if (!IsResSigned) + return (IsInSigned) ? Intrinsic::hexagon_vasrsat_su + : Intrinsic::hexagon_vasrsat_uu; + return Intrinsic::hexagon_vasrsat_ss; +} + +Value *HexagonGenWideningVecInstr::createVAsrIntrinsic(Instruction *Inst, + Value *VecOP, + Value *ShiftByVal, + bool IsResSigned) { + IRBuilder<> IRB(Inst); + Type *ShiftByTy = ShiftByVal->getType(); + if (ShiftByTy->getPrimitiveSizeInBits() < 32) + ShiftByVal = extendShiftByVal(ShiftByVal, IRB); + + Type *InstTy = Inst->getType(); + Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType(); + unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements(); + unsigned InstEltSize = EltTy->getPrimitiveSizeInBits(); + + // Replace the instruction with saturating vasr intrinsic. + // Since vasr with saturation interleaves elements from both input vectors, + // they must be deinterleaved for output to end up in the right order. + SmallVector<Constant *, 16> ShuffleMask; + unsigned HalfElts = NumElts / 2; + // Even elements + for (unsigned i = 0; i < HalfElts; ++i) + ShuffleMask.push_back(IRB.getInt32(i * 2)); + // Odd elements + for (unsigned i = 0; i < HalfElts; ++i) + ShuffleMask.push_back(IRB.getInt32(i * 2 + 1)); + + VecOP = IRB.CreateShuffleVector(VecOP, PoisonValue::get(VecOP->getType()), + ConstantVector::get(ShuffleMask)); + + auto *InVecOPTy = + FixedVectorType::get(getElementTy(InstEltSize * 2, IRB), HalfElts); + std::pair<Value *, Value *> HiLo = opSplit(VecOP, Inst, InVecOPTy); + Intrinsic::ID IntID = getVAsrIntrinsic(true, IsResSigned); + Function *F = Intrinsic::getOrInsertDeclaration(M, IntID, InVecOPTy); + Value *NewIn = IRB.CreateCall(F, {HiLo.first, HiLo.second, ShiftByVal}); + return IRB.CreateBitCast(NewIn, InstTy); +} + +// Generate vavg instruction. +bool HexagonGenWideningVecInstr::genVAvg(Instruction *Inst) { + using namespace PatternMatch; + Type *InstTy = Inst->getType(); + assert(InstTy->isVectorTy()); + + bool Match = false; + Value *OP1 = nullptr, *OP2 = nullptr; + bool IsSigned; + if ((Match = (match(Inst, m_Trunc(m_LShr(m_Add(m_ZExt(m_Value(OP1)), + m_ZExt(m_Value(OP2))), + m_SpecificInt(1))))))) + IsSigned = false; + if (!Match && + (Match = (match(Inst, m_Trunc(m_LShr(m_Add(m_SExt(m_Value(OP1)), + m_SExt(m_Value(OP2))), + m_SpecificInt(1))))) || + match(Inst, m_LShr(m_Add(m_Value(OP1), m_Value(OP2)), + m_SpecificInt(1))))) + IsSigned = true; + + if (!Match) + return false; + + unsigned OP1EltSize = getElementSizeInBits(OP1); + unsigned OP2EltSize = getElementSizeInBits(OP2); + unsigned NewEltSize = std::max(OP1EltSize, OP2EltSize); + + Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType(); + unsigned InstEltSize = EltTy->getPrimitiveSizeInBits(); + unsigned InstLen = InstTy->getPrimitiveSizeInBits(); + + // Only vectors that are either smaller, same or twice of the hardware + // vector length are allowed. + if (InstEltSize < NewEltSize || (InstLen > 2 * HwVLen)) + return false; + + if ((InstLen > HwVLen) && (InstLen % HwVLen != 0)) + return false; + + IRBuilder<> IRB(Inst); + unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements(); + auto *AvgInstTy = + FixedVectorType::get(getElementTy(NewEltSize, IRB), NumElts); + if (OP1EltSize < NewEltSize) + OP1 = (IsSigned) ? IRB.CreateSExt(OP1, AvgInstTy) + : IRB.CreateZExt(OP1, AvgInstTy); + if (OP2EltSize < NewEltSize) + OP2 = (IsSigned) ? IRB.CreateSExt(OP2, AvgInstTy) + : IRB.CreateZExt(OP2, AvgInstTy); + + Intrinsic::ID AvgIntID = + (IsSigned) ? Intrinsic::hexagon_vavgs : Intrinsic::hexagon_vavgu; + Value *NewIn = nullptr; + + // Split operands if they need more than a vector length. + if (NewEltSize * NumElts > HwVLen) { + unsigned HalfElts = NumElts / 2; + auto *ResType = + FixedVectorType::get(getElementTy(NewEltSize, IRB), HalfElts); + std::pair<Value *, Value *> SplitOP1 = opSplit(OP1, Inst, ResType); + std::pair<Value *, Value *> SplitOP2 = opSplit(OP2, Inst, ResType); + Value *NewHi = createIntrinsic(AvgIntID, Inst, SplitOP1.first, + SplitOP2.first, ResType, NumElts, false); + Value *NewLo = createIntrinsic(AvgIntID, Inst, SplitOP1.second, + SplitOP2.second, ResType, NumElts, false); + SmallVector<Constant *, 8> ShuffleMask; + for (unsigned i = 0; i < NumElts; ++i) + ShuffleMask.push_back(IRB.getInt32(i)); + // Concat Hi and Lo. + NewIn = + IRB.CreateShuffleVector(NewLo, NewHi, ConstantVector::get(ShuffleMask)); + } else + NewIn = + createIntrinsic(AvgIntID, Inst, OP1, OP2, AvgInstTy, NumElts, false); + + if (InstEltSize > NewEltSize) + // Extend the output to match the original instruction type. + NewIn = (IsSigned) ? IRB.CreateSExt(NewIn, InstTy) + : IRB.CreateZExt(NewIn, InstTy); + Inst->replaceAllUsesWith(NewIn); + return true; +} + +bool HexagonGenWideningVecInstr::visitBlock(BasicBlock *B) { + bool Changed = false; + for (auto &I : *B) { + Type *InstTy = I.getType(); + if (!InstTy->isVectorTy() || !HST->isTypeForHVX(cast<VectorType>(InstTy))) + continue; + + unsigned InstLen = InstTy->getPrimitiveSizeInBits(); + if (InstLen < HwVLen && !WidenShortVector) + continue; + + Changed |= processInstructionForVMPA(&I); + Changed |= genSaturatingInst(&I); + Changed |= genVAvg(&I); + } + // Generate widening instructions. + for (auto &I : *B) + Changed |= processInstruction(&I); + return Changed; +} + +bool HexagonGenWideningVecInstr::runOnFunction(Function &F) { + M = F.getParent(); + HST = TM->getSubtargetImpl(F); + + // Return if useHVX128BOps is not set. It can be enabled for 64B mode + // but wil require some changes. For example, bitcast for intrinsics + // assumes 128B mode. + if (skipFunction(F) || !HST->useHVX128BOps()) + return false; + + HwVLen = HST->getVectorLength() * 8; // Vector Length in bits + bool Changed = false; + for (auto &B : F) + Changed |= visitBlock(&B); + + return Changed; +} + +FunctionPass * +llvm::createHexagonGenWideningVecInstr(const HexagonTargetMachine &TM) { + return new HexagonGenWideningVecInstr(&TM); +} diff --git a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h index 0528cbd..683feb1 100644 --- a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h +++ b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h @@ -50,10 +50,7 @@ public: const HexagonSubtarget &ST) : Resources(ST.createDFAPacketizer(II)), TII(HII) { } - ~HexagonHazardRecognizer() override { - if (Resources) - delete Resources; - } + ~HexagonHazardRecognizer() override { delete Resources; } /// This callback is invoked when a new block of instructions is about to be /// scheduled. The hazard state is set to an initialized state. diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 3cc146b..728ffef 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -811,8 +811,8 @@ ArrayRef<int> hi(ArrayRef<int> Vuu) { return Vuu.take_back(Vuu.size() / 2); } MaskT vshuffvdd(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Rt) { int Len = Vu.size(); MaskT Vdd(2 * Len); - std::copy(Vv.begin(), Vv.end(), Vdd.begin()); - std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len); + llvm::copy(Vv, Vdd.begin()); + llvm::copy(Vu, Vdd.begin() + Len); auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len); auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len); @@ -831,8 +831,8 @@ MaskT vshuffvdd(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Rt) { MaskT vdealvdd(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Rt) { int Len = Vu.size(); MaskT Vdd(2 * Len); - std::copy(Vv.begin(), Vv.end(), Vdd.begin()); - std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len); + llvm::copy(Vv, Vdd.begin()); + llvm::copy(Vu, Vdd.begin() + Len); auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len); auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 526b4de..025e5b0 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1504,8 +1504,8 @@ HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op, HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, const HexagonSubtarget &ST) - : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)), - Subtarget(ST) { + : TargetLowering(TM, ST), + HTM(static_cast<const HexagonTargetMachine &>(TM)), Subtarget(ST) { auto &HRI = *Subtarget.getRegisterInfo(); setPrefLoopAlignment(Align(16)); @@ -1677,6 +1677,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, } // Turn FP truncstore into trunc + store. setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f32, MVT::bf16, Expand); + setTruncStoreAction(MVT::f64, MVT::bf16, Expand); // Turn FP extload into load/fpextend. for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); @@ -1872,9 +1874,15 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + setOperationAction(ISD::BF16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::BF16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_BF16, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); @@ -2107,7 +2115,7 @@ static Value *getUnderLyingObjectForBrevLdIntr(Value *V) { /// true and store the intrinsic information into the IntrinsicInfo that was /// passed to the function. bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { @@ -2519,7 +2527,7 @@ HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values, // Make sure to always cast to IntTy. if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) { const ConstantInt *CI = CN->getConstantIntValue(); - Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue()); + Consts[i] = ConstantInt::getSigned(IntTy, CI->getValue().getSExtValue()); } else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) { const ConstantFP *CF = CN->getConstantFPValue(); APInt A = CF->getValueAPF().bitcastToAPInt(); @@ -3948,3 +3956,51 @@ HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { return AtomicExpansionKind::LLSC; } + +bool HexagonTargetLowering::isMaskAndCmp0FoldingBeneficial( + const Instruction &AndI) const { + // Only sink 'and' mask to cmp use block if it is masking a single bit since + // this will fold the and/cmp/br into a single tstbit instruction. + ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1)); + if (!Mask) + return false; + return Mask->getValue().isPowerOf2(); +} + +// Check if the result of the node is only used as a return value, as +// otherwise we can't perform a tail-call. +bool HexagonTargetLowering::isUsedByReturnOnly(SDNode *N, + SDValue &Chain) const { + if (N->getNumValues() != 1) + return false; + if (!N->hasNUsesOfValue(1, 0)) + return false; + + SDNode *Copy = *N->user_begin(); + + if (Copy->getOpcode() == ISD::BITCAST) { + return isUsedByReturnOnly(Copy, Chain); + } + + if (Copy->getOpcode() != ISD::CopyToReg) { + return false; + } + + // If the ISD::CopyToReg has a glue operand, we conservatively assume it + // isn't safe to perform a tail call. + if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue) + return false; + + // The copy must be used by a HexagonISD::RET_GLUE, and nothing else. + bool HasRet = false; + for (SDNode *Node : Copy->users()) { + if (Node->getOpcode() != HexagonISD::RET_GLUE) + return false; + HasRet = true; + } + if (!HasRet) + return false; + + Chain = Copy->getOperand(0); + return true; +} diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 8d04edb..2d7e3c3 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -145,7 +145,7 @@ public: const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override; @@ -160,6 +160,10 @@ public: bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + + bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; + /// Return true if an FMA operation is faster than a pair of mul and add /// instructions. fmuladd intrinsics will be expanded to FMAs when this /// method returns true (and FMAs are legal), otherwise fmuladd is @@ -588,6 +592,7 @@ private: SDValue WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const; SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const; SDValue WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const; + SDValue WidenHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const; SDValue LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const; SDValue ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG) const; SDValue EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 0573f64..4bc8e74 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -31,6 +31,10 @@ static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors")); +static cl::opt<bool> + EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false), + cl::desc("Enable FP fast conversion routine.")); + static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; @@ -88,6 +92,10 @@ HexagonTargetLowering::initializeHVXLowering() { addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass); } + if (Subtarget.useHVXV81Ops()) { + addRegisterClass(MVT::v64bf16, &Hexagon::HvxVRRegClass); + addRegisterClass(MVT::v128bf16, &Hexagon::HvxWRRegClass); + } } // Set up operation actions. @@ -162,6 +170,30 @@ HexagonTargetLowering::initializeHVXLowering() { setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW); setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV); + if (Subtarget.useHVXV81Ops()) { + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128bf16, ByteW); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64bf16, ByteV); + setPromoteTo(ISD::SETCC, MVT::v64bf16, MVT::v64f32); + setPromoteTo(ISD::FADD, MVT::v64bf16, MVT::v64f32); + setPromoteTo(ISD::FSUB, MVT::v64bf16, MVT::v64f32); + setPromoteTo(ISD::FMUL, MVT::v64bf16, MVT::v64f32); + setPromoteTo(ISD::FMINNUM, MVT::v64bf16, MVT::v64f32); + setPromoteTo(ISD::FMAXNUM, MVT::v64bf16, MVT::v64f32); + + setOperationAction(ISD::SPLAT_VECTOR, MVT::v64bf16, Legal); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64bf16, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64bf16, Custom); + + setOperationAction(ISD::MLOAD, MVT::v64bf16, Custom); + setOperationAction(ISD::MSTORE, MVT::v64bf16, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v64bf16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64bf16, Custom); + + setOperationAction(ISD::SPLAT_VECTOR, MVT::bf16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::bf16, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom); + } + for (MVT P : FloatW) { setOperationAction(ISD::LOAD, P, Custom); setOperationAction(ISD::STORE, P, Custom); @@ -438,6 +470,7 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::ANY_EXTEND, VecTy, Custom); setOperationAction(ISD::SIGN_EXTEND, VecTy, Custom); setOperationAction(ISD::ZERO_EXTEND, VecTy, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, VecTy, Custom); if (Subtarget.useHVXFloatingPoint()) { setOperationAction(ISD::FP_TO_SINT, VecTy, Custom); setOperationAction(ISD::FP_TO_UINT, VecTy, Custom); @@ -462,6 +495,10 @@ HexagonTargetLowering::initializeHVXLowering() { unsigned HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const { + // Early exit for invalid input types + if (!VecTy.isVector()) + return ~0u; + MVT ElemTy = VecTy.getVectorElementType(); unsigned VecLen = VecTy.getVectorNumElements(); unsigned HwLen = Subtarget.getVectorLength(); @@ -1667,14 +1704,15 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is // not a legal type, just bitcast the node to use i16 // types and bitcast the result back to f16 - if (VecTy.getVectorElementType() == MVT::f16) { - SmallVector<SDValue,64> NewOps; + if (VecTy.getVectorElementType() == MVT::f16 || + VecTy.getVectorElementType() == MVT::bf16) { + SmallVector<SDValue, 64> NewOps; for (unsigned i = 0; i != Size; i++) NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i])); - SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl, - tyVector(VecTy, MVT::i16), NewOps); - return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0); + SDValue T0 = + DAG.getNode(ISD::BUILD_VECTOR, dl, tyVector(VecTy, MVT::i16), NewOps); + return DAG.getBitcast(tyVector(VecTy, VecTy.getVectorElementType()), T0); } // First, split the BUILD_VECTOR for vector pairs. We could generate @@ -1698,7 +1736,7 @@ HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG) MVT VecTy = ty(Op); MVT ArgTy = ty(Op.getOperand(0)); - if (ArgTy == MVT::f16) { + if (ArgTy == MVT::f16 || ArgTy == MVT::bf16) { MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements()); SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0)); SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16); @@ -1831,12 +1869,12 @@ HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) if (ElemTy == MVT::i1) return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG); - if (ElemTy == MVT::f16) { + if (ElemTy == MVT::f16 || ElemTy == MVT::bf16) { SDValue T0 = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, tyVector(VecTy, MVT::i16), DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV), DAG.getBitcast(MVT::i16, ValV), IdxV); - return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0); + return DAG.getBitcast(tyVector(VecTy, ElemTy), T0); } return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG); @@ -2334,6 +2372,25 @@ SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op, MVT VecTy = ty(Op); MVT ArgTy = ty(Op.getOperand(0)); const SDLoc &dl(Op); + + if (ArgTy == MVT::v64bf16) { + MVT HalfTy = typeSplit(VecTy).first; + SDValue BF16Vec = Op.getOperand(0); + SDValue Zeroes = + getInstr(Hexagon::V6_vxor, dl, HalfTy, {BF16Vec, BF16Vec}, DAG); + // Interleave zero vector with the bf16 vector, with zeroes in the lower + // half of each 32 bit lane, effectively extending the bf16 values to fp32 + // values. + SDValue ShuffVec = + getInstr(Hexagon::V6_vshufoeh, dl, VecTy, {BF16Vec, Zeroes}, DAG); + VectorPair VecPair = opSplit(ShuffVec, dl, DAG); + SDValue Result = getInstr(Hexagon::V6_vshuffvdd, dl, VecTy, + {VecPair.second, VecPair.first, + DAG.getSignedConstant(-4, dl, MVT::i32)}, + DAG); + return Result; + } + assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16); SDValue F16Vec = Op.getOperand(0); @@ -2918,6 +2975,32 @@ HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); assert(InpTy.changeTypeToInteger() == ResTy); + // At this point this is an experiment under a flag. + // In arch before V81 the rounding mode is towards nearest value. + // The C/C++ standard requires rounding towards zero: + // C (C99 and later): ISO/IEC 9899:2018 (C18), section 6.3.1.4 — "When a + // finite value of real floating type is converted to an integer type, the + // fractional part is discarded (i.e., the value is truncated toward zero)." + // C++: ISO/IEC 14882:2020 (C++20), section 7.3.7 — "A prvalue of a + // floating-point type can be converted to a prvalue of an integer type. The + // conversion truncates; that is, the fractional part is discarded." + if (InpTy == MVT::v64f16) { + if (Subtarget.useHVXV81Ops()) { + // This is c/c++ compliant + SDValue ConvVec = + getInstr(Hexagon::V6_vconv_h_hf_rnd, dl, ResTy, {Op0}, DAG); + return ConvVec; + } else if (EnableFpFastConvert) { + // Vd32.h=Vu32.hf same as Q6_Vh_equals_Vhf + SDValue ConvVec = getInstr(Hexagon::V6_vconv_h_hf, dl, ResTy, {Op0}, DAG); + return ConvVec; + } + } else if (EnableFpFastConvert && InpTy == MVT::v32f32) { + // Vd32.w=Vu32.sf same as Q6_Vw_equals_Vsf + SDValue ConvVec = getInstr(Hexagon::V6_vconv_w_sf, dl, ResTy, {Op0}, DAG); + return ConvVec; + } + // int32_t conv_f32_to_i32(uint32_t inp) { // // s | exp8 | frac23 // @@ -3351,6 +3434,104 @@ HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const { {SetCC, getZero(dl, MVT::i32, DAG)}); } +SDValue HexagonTargetLowering::WidenHvxIntrinsic(SDValue Op, + SelectionDAG &DAG) const { + const SDLoc &dl(Op); + unsigned HwWidth = 8 * Subtarget.getVectorLength(); + bool IsResInterleaved = false; + + SDValue WideRes = SDValue(); + SDValue Op1 = Op.getOperand(1); + MVT ResTy = ty(Op); + MVT OpTy = ty(Op1); + if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy)) + return SDValue(); + + auto getFactor = [HwWidth](MVT Ty) { + unsigned Width = Ty.getSizeInBits(); + assert(HwWidth % Width == 0); + return HwWidth / Width; + }; + + auto getWideTy = [getFactor](MVT Ty) { + unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty); + return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); + }; + + unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + SDValue Op2 = Op.getOperand(2); + SDValue WideOp1 = appendUndef(Op1, getWideTy(OpTy), DAG); + SDValue WideOp2; + if (dyn_cast<const ConstantSDNode>(Op2.getNode())) { + WideOp2 = Op2; + } else { + WideOp2 = appendUndef(Op2, getWideTy(OpTy), DAG); + } + unsigned WidenFactor = getFactor(OpTy); + unsigned WideLen = ResTy.getVectorNumElements() * WidenFactor; + MVT WideResTy = MVT::getVectorVT(ResTy.getVectorElementType(), WideLen); + + switch (IID) { + default: + return SDValue(); + case Intrinsic::hexagon_vasrsat_su: + case Intrinsic::hexagon_vasrsat_uu: + case Intrinsic::hexagon_vasrsat_ss: + WideRes = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WideResTy, + DAG.getConstant(IID, dl, MVT::i32), WideOp1, WideOp2, + Op.getOperand(3)); + break; + case Intrinsic::hexagon_vadd_su: + case Intrinsic::hexagon_vadd_uu: + case Intrinsic::hexagon_vadd_ss: + case Intrinsic::hexagon_vadd_us: + + case Intrinsic::hexagon_vsub_su: + case Intrinsic::hexagon_vsub_uu: + case Intrinsic::hexagon_vsub_ss: + case Intrinsic::hexagon_vsub_us: + + case Intrinsic::hexagon_vmpy_su: + case Intrinsic::hexagon_vmpy_uu: + case Intrinsic::hexagon_vmpy_ss: + case Intrinsic::hexagon_vmpy_us: + case Intrinsic::hexagon_vmpy_ub_ub: + case Intrinsic::hexagon_vmpy_ub_b: + case Intrinsic::hexagon_vmpy_uh_uh: + case Intrinsic::hexagon_vmpy_h_h: + IsResInterleaved = true; + WideRes = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WideResTy, + DAG.getConstant(IID, dl, MVT::i32), WideOp1, WideOp2); + break; + case Intrinsic::hexagon_vavgu: + case Intrinsic::hexagon_vavgs: + WideRes = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WideResTy, + DAG.getConstant(IID, dl, MVT::i32), WideOp1, WideOp2); + break; + } + unsigned OrigLen = ResTy.getVectorNumElements(); + assert(OrigLen % 2 == 0); + unsigned HalfOrigLen = OrigLen / 2; + unsigned SplitLen = WideLen / 2; + if (IsResInterleaved) { + // Get the valid odd and even elements from the widened vector-pair while + // maintaining their deinterleaved order. The following shuffle_vector will + // produce a vector-pair with all the valid elements (even followed by odd) + // accumulated together followed by undefs. + SmallVector<int, 128> ShuffV; + for (unsigned j = 0; j < WidenFactor; j++) { + for (unsigned i = 0; i < HalfOrigLen; i++) + ShuffV.push_back(j * HalfOrigLen + i); + for (unsigned i = 0; i < HalfOrigLen; i++) + ShuffV.push_back(SplitLen + j * HalfOrigLen + i); + } + WideRes = DAG.getVectorShuffle(WideResTy, dl, WideRes, + DAG.getUNDEF(WideResTy), ShuffV); + } + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy, + {WideRes, getZero(dl, MVT::i32, DAG)}); +} + SDValue HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { unsigned Opc = Op.getOpcode(); @@ -3617,6 +3798,12 @@ HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, Results.push_back(S); } break; + case ISD::INTRINSIC_WO_CHAIN: + if (shouldWidenToHvx(ty(Op.getOperand(1)), DAG)) { + if (SDValue T = WidenHvxIntrinsic(Op, DAG)) + Results.push_back(T); + } + break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::FP_TO_SINT: @@ -3677,6 +3864,11 @@ HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, Results.push_back(C); } break; + case ISD::INTRINSIC_WO_CHAIN: + assert(shouldWidenToHvx(ty(N->getOperand(1)), DAG) && "Not widening?"); + if (SDValue T = WidenHvxIntrinsic(Op, DAG)) + Results.push_back(T); + break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) { diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 47726d6..7682af4 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -118,9 +118,9 @@ const int Hexagon_ADDI_OFFSET_MIN = -32768; void HexagonInstrInfo::anchor() {} HexagonInstrInfo::HexagonInstrInfo(const HexagonSubtarget &ST) - : HexagonGenInstrInfo(ST, Hexagon::ADJCALLSTACKDOWN, + : HexagonGenInstrInfo(ST, RegInfo, Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), - Subtarget(ST) {} + RegInfo(ST.getHwMode()), Subtarget(ST) {} namespace llvm { namespace HexagonFUnits { @@ -964,7 +964,6 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL = MBB.findDebugLoc(I); @@ -1009,10 +1008,12 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, } } -void HexagonInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { DebugLoc DL = MBB.findDebugLoc(I); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -4753,6 +4754,19 @@ bool HexagonInstrInfo::getBundleNoShuf(const MachineInstr &MIB) const { return (Operand.isImm() && (Operand.getImm() & memShufDisabledMask) != 0); } +bool HexagonInstrInfo::isQFPMul(const MachineInstr *MI) const { + return (MI->getOpcode() == Hexagon::V6_vmpy_qf16_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_sf || + MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf16 || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_qf16 || + MI->getOpcode() == Hexagon::V6_vmpy_qf32); +} + // Addressing mode relations. short HexagonInstrInfo::changeAddrMode_abs_io(short Opc) const { return Opc >= 0 ? Hexagon::changeAddrMode_abs_io(Opc) : Opc; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index c17e527..796b978 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -23,6 +23,8 @@ #include <cstdint> #include <vector> +#include "HexagonRegisterInfo.h" + #define GET_INSTRINFO_HEADER #include "HexagonGenInstrInfo.inc" @@ -36,6 +38,7 @@ class MachineOperand; class TargetRegisterInfo; class HexagonInstrInfo : public HexagonGenInstrInfo { + const HexagonRegisterInfo RegInfo; const HexagonSubtarget &Subtarget; enum BundleAttribute { @@ -47,6 +50,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { public: explicit HexagonInstrInfo(const HexagonSubtarget &ST); + const HexagonRegisterInfo &getRegisterInfo() const { return RegInfo; } + /// TargetInstrInfo overrides. /// If the specified machine instruction is a direct @@ -183,8 +188,7 @@ public: /// is true, the register operand is the last use and must be marked kill. void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; /// Load the specified register of the given register class from the specified @@ -193,7 +197,7 @@ public: void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; /// This function is called for all pseudo instructions @@ -532,6 +536,7 @@ public: } MCInst getNop() const override; + bool isQFPMul(const MachineInstr *MF) const; }; /// \brief Create RegSubRegPair from a register MachineOperand diff --git a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td index 25b81d8..7f16c3e 100644 --- a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td @@ -355,6 +355,120 @@ defm : T_VVI_inv_pat <V6_valignbi, int_hexagon_V6_vlalignbi>; defm : T_VVI_inv_pat <V6_valignbi, int_hexagon_V6_vlalignb>; defm : T_VVR_pat <V6_vlalignb, int_hexagon_V6_vlalignbi>; + +class VAccGenIntrin_pat<InstHexagon MI, Intrinsic IntID, + ValueType ResType, PatFrag VPred, PatFrag WPred> + : Pat<(add WPred:$Vx, (ResType (IntID VPred:$Vs, VPred:$Vt))), + (MI WPred:$Vx, VPred:$Vs, VPred:$Vt)>, Requires<[UseHVX128B]>; + +let AddedComplexity = 100 in { + def : VAccGenIntrin_pat<V6_vmpybv_acc, + int_hexagon_vmpy_ss, VecPI16, HVI8, HWI16>; + def : VAccGenIntrin_pat<V6_vmpyubv_acc, + int_hexagon_vmpy_uu, VecPI16, HVI8, HWI16>; + def : VAccGenIntrin_pat<V6_vmpyhv_acc, + int_hexagon_vmpy_ss, VecPI32, HVI16, HWI32>; + def : VAccGenIntrin_pat<V6_vmpyuhv_acc, + int_hexagon_vmpy_uu, VecPI32, HVI16, HWI32>; + + // The second operand in V6_vmpybusv_acc is unsigned. + def : Pat<(add HWI16:$Vx, (VecPI16 (int_hexagon_vmpy_us HVI8:$Vs, + HVI8:$Vv))), + (V6_vmpybusv_acc HWI16:$Vx, HVI8:$Vs, HVI8:$Vv)>; + + def : Pat<(add HWI16:$Vx, (VecPI16 (int_hexagon_vmpy_su HVI8:$Vs, + HVI8:$Vv))), + (V6_vmpybusv_acc HWI16:$Vx, HVI8:$Vv, HVI8:$Vs)>; + + // The third operand in V6_vmpyhus_acc is unsigned. + def : Pat<(add HWI32:$Vx, (VecPI32 (int_hexagon_vmpy_us HVI16:$Vs, + HVI16:$Vv))), + (V6_vmpyhus_acc HWI32:$Vx, HVI16:$Vv, HVI16:$Vs)>; + + def : Pat<(add HWI32:$Vx, (VecPI32 (int_hexagon_vmpy_su HVI16:$Vs, + HVI16:$Vv))), + (V6_vmpyhus_acc HWI32:$Vx, HVI16:$Vs, HVI16:$Vv)>; +} + +class ExtIntrin_pat<InstHexagon MI, Intrinsic IntID, + ValueType ResType, PatFrag VPred> + : Pat<(ResType (IntID VPred:$Vs, VPred:$Vt)), + (MI VPred:$Vs, VPred:$Vt)>, Requires<[UseHVX128B]>; + +def : ExtIntrin_pat<V6_vaddubh, int_hexagon_vadd_uu, VecPI16, HVI8>; +def : ExtIntrin_pat<V6_vadduhw, int_hexagon_vadd_uu, VecPI32, HVI16>; +def : ExtIntrin_pat<V6_vaddhw, int_hexagon_vadd_ss, VecPI32, HVI16>; + +def : ExtIntrin_pat<V6_vsububh, int_hexagon_vsub_uu, VecPI16, HVI8>; +def : ExtIntrin_pat<V6_vsubuhw, int_hexagon_vsub_uu, VecPI32, HVI16>; +def : ExtIntrin_pat<V6_vsubhw, int_hexagon_vsub_ss, VecPI32, HVI16>; + +def : ExtIntrin_pat<V6_vmpybv, int_hexagon_vmpy_ss, VecPI16, HVI8>; +def : ExtIntrin_pat<V6_vmpyhv, int_hexagon_vmpy_ss, VecPI32, HVI16>; +def : ExtIntrin_pat<V6_vmpyubv, int_hexagon_vmpy_uu, VecPI16, HVI8>; +def : ExtIntrin_pat<V6_vmpyuhv, int_hexagon_vmpy_uu, VecPI32, HVI16>; + +// The first operand in V6_vmpybusv is unsigned. +def : Pat<(VecPI16 (int_hexagon_vmpy_us HVI8:$Vs, HVI8:$Vv)), + (V6_vmpybusv HVI8:$Vs, HVI8:$Vv)>; + +def : Pat<(VecPI16 (int_hexagon_vmpy_su HVI8:$Vs, HVI8:$Vv)), + (V6_vmpybusv HVI8:$Vv, HVI8:$Vs)>; + +// The second operand in V6_vmpyhus is unsigned. +def : Pat<(VecPI32 (int_hexagon_vmpy_us HVI16:$Vs, HVI16:$Vv)), + (V6_vmpyhus HVI16:$Vv, HVI16:$Vs)>; + +def : Pat<(VecPI32 (int_hexagon_vmpy_su HVI16:$Vs, HVI16:$Vv)), + (V6_vmpyhus HVI16:$Vs, HVI16:$Vv)>; + +class VAvgInstr_pat<InstHexagon MI, Intrinsic IntID, + ValueType ResType, PatFrag VPred> + : Pat<(ResType (IntID VPred:$Vs, VPred:$Vt)), + (MI VPred:$Vs, VPred:$Vt)>, Requires<[UseHVX128B]>; + +def : VAvgInstr_pat<V6_vavgub, int_hexagon_vavgu, VecI8, HVI8>; +def : VAvgInstr_pat<V6_vavgb, int_hexagon_vavgs, VecI8, HVI8>; +def : VAvgInstr_pat<V6_vavguh, int_hexagon_vavgu, VecI16, HVI16>; +def : VAvgInstr_pat<V6_vavgh, int_hexagon_vavgs, VecI16, HVI16>; +def : VAvgInstr_pat<V6_vavguw, int_hexagon_vavgu, VecI32, HVI32>; +def : VAvgInstr_pat<V6_vavgw, int_hexagon_vavgs, VecI32, HVI32>; + +class VAsrIntr_pat<InstHexagon MI, Intrinsic IntID, + ValueType ResType, PatFrag VPred> +: Pat<(ResType (IntID VPred:$Vs, VPred:$Vt, IntRegsLow8:$Rt)), + (MI VPred:$Vs, VPred:$Vt, IntRegsLow8:$Rt)>, Requires<[UseHVX128B]>; + +def : VAsrIntr_pat<V6_vasruhubsat, int_hexagon_vasrsat_uu, VecI8, HVI16>; +def : VAsrIntr_pat<V6_vasrhubsat, int_hexagon_vasrsat_su, VecI8, HVI16>; +def : VAsrIntr_pat<V6_vasrhbsat, int_hexagon_vasrsat_ss, VecI8, HVI16>; +def : VAsrIntr_pat<V6_vasruwuhsat, int_hexagon_vasrsat_uu, VecI16, HVI32>; +def : VAsrIntr_pat<V6_vasrwuhsat, int_hexagon_vasrsat_su, VecI16, HVI32>; +def : VAsrIntr_pat<V6_vasrwhsat, int_hexagon_vasrsat_ss, VecI16, HVI32>; + +class VMpyVSInstr_pat<InstHexagon MI, Intrinsic IntID, + ValueType ResType, PatFrag VPred> +: Pat<(ResType (IntID VPred:$Vs, IntRegs:$Rt)), + (MI VPred:$Vs, IntRegs:$Rt)>, Requires<[UseHVX128B]>; + +def : VMpyVSInstr_pat<V6_vmpyub, int_hexagon_vmpy_ub_ub, VecPI16, HVI8>; +def : VMpyVSInstr_pat<V6_vmpybus, int_hexagon_vmpy_ub_b, VecPI16, HVI8>; +def : VMpyVSInstr_pat<V6_vmpyuh, int_hexagon_vmpy_uh_uh, VecPI32, HVI16>; +def : VMpyVSInstr_pat<V6_vmpyh, int_hexagon_vmpy_h_h, VecPI32, HVI16>; + +class VAccIntrin_pat<InstHexagon MI, Intrinsic IntID> + : Pat<(add HvxWR:$Vx, (IntID HvxVR:$Vs, HvxVR:$Vt)), + (MI HvxWR:$Vx, HvxVR:$Vs, HvxVR:$Vt)>, Requires<[UseHVX128B]>; + +let AddedComplexity = 350 in { + def : VAccIntrin_pat<V6_vmpybv_acc, int_hexagon_V6_vmpybv_128B>; + def : VAccIntrin_pat<V6_vmpyubv_acc, int_hexagon_V6_vmpyubv_128B>; + def : VAccIntrin_pat<V6_vmpybusv_acc, int_hexagon_V6_vmpybusv_128B>; + def : VAccIntrin_pat<V6_vmpyhv_acc, int_hexagon_V6_vmpyhv_128B>; + def : VAccIntrin_pat<V6_vmpyuhv_acc, int_hexagon_V6_vmpyuhv_128B>; + def : VAccIntrin_pat<V6_vmpyhus_acc, int_hexagon_V6_vmpyhus_128B>; +} + def: Pat<(int_hexagon_V6_vd0), (V6_vd0)>, Requires<[UseHVXV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vd0_128B ), diff --git a/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp index 7cbd81f..54969b2 100644 --- a/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp +++ b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp @@ -646,7 +646,7 @@ bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, MachineInstr *CombI; if (Acc != 0) { const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); - const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI); + const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0); Register VReg = MF->getRegInfo().createVirtualRegister(RC); MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(LowerAcc); NG.push_back(TfrI); @@ -677,7 +677,7 @@ bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, } else { // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); - const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI); + const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0); Register VReg = MF->getRegInfo().createVirtualRegister(RC); MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(int(Acc)); NG.push_back(TfrI); diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp index 5a1d5bc..c68b632 100644 --- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -138,7 +138,7 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, return false; // Make sure that the (unique) def operand is a register from IntRegs. - bool HadDef = false; + [[maybe_unused]] bool HadDef = false; for (const MachineOperand &Op : II->operands()) { if (!Op.isReg() || !Op.isDef()) continue; diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 6dd83c1..53afbc4 100644 --- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -107,7 +107,7 @@ private: bool canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, MachineInstr &MI, const NodeList &UNodeList); bool isSafeToExtLR(NodeAddr<StmtNode *> SN, MachineInstr *MI, - unsigned LRExtReg, const NodeList &UNodeList); + Register LRExtReg, const NodeList &UNodeList); void getAllRealUses(NodeAddr<StmtNode *> SN, NodeList &UNodeList); bool allValidCandidates(NodeAddr<StmtNode *> SA, NodeList &UNodeList); short getBaseWithLongOffset(const MachineInstr &MI) const; @@ -177,7 +177,7 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, NodeId OffsetRegRD = 0; for (NodeAddr<UseNode *> UA : AddAslSN.Addr->members_if(DFG->IsUse, *DFG)) { RegisterRef RR = UA.Addr->getRegRef(*DFG); - if (OffsetReg == RR.Reg) { + if (OffsetReg == RR.asMCReg()) { OffsetRR = RR; OffsetRegRD = UA.Addr->getReachingDef(); } @@ -198,7 +198,7 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, // Reaching Def to an offset register can't be a phi. if ((OffsetRegDN.Addr->getFlags() & NodeAttrs::PhiRef) && MI.getParent() != UseMI.getParent()) - return false; + return false; const MCInstrDesc &UseMID = UseMI.getDesc(); if ((!UseMID.mayLoad() && !UseMID.mayStore()) || @@ -300,7 +300,7 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA, } bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr<StmtNode *> SN, - MachineInstr *MI, unsigned LRExtReg, + MachineInstr *MI, Register LRExtReg, const NodeList &UNodeList) { RegisterRef LRExtRR; NodeId LRExtRegRD = 0; @@ -308,7 +308,7 @@ bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr<StmtNode *> SN, // for the LRExtReg. for (NodeAddr<UseNode *> UA : SN.Addr->members_if(DFG->IsUse, *DFG)) { RegisterRef RR = UA.Addr->getRegRef(*DFG); - if (LRExtReg == RR.Reg) { + if (LRExtReg == RR.asMCReg()) { LRExtRR = RR; LRExtRegRD = UA.Addr->getReachingDef(); } @@ -552,7 +552,7 @@ bool HexagonOptAddrMode::processAddBases(NodeAddr<StmtNode *> AddSN, // Find the UseNode that contains the base register and it's reachingDef for (NodeAddr<UseNode *> UA : AddSN.Addr->members_if(DFG->IsUse, *DFG)) { RegisterRef URR = UA.Addr->getRegRef(*DFG); - if (BaseReg != URR.Reg) + if (BaseReg != URR.asMCReg()) continue; UAReachingDefID = UA.Addr->getReachingDef(); @@ -740,7 +740,7 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr<StmtNode *> AddSN, // for the LRExtReg. for (NodeAddr<UseNode *> UA : AddSN.Addr->members_if(DFG->IsUse, *DFG)) { RegisterRef RR = UA.Addr->getRegRef(*DFG); - if (BaseReg == RR.Reg) + if (BaseReg == RR.asMCReg()) LRExtRegRD = UA.Addr->getReachingDef(); } diff --git a/llvm/lib/Target/Hexagon/HexagonOptShuffleVector.cpp b/llvm/lib/Target/Hexagon/HexagonOptShuffleVector.cpp new file mode 100644 index 0000000..fcfae17 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonOptShuffleVector.cpp @@ -0,0 +1,713 @@ +//===---------------------- HexagonOptShuffleVector.cpp -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Optimize vector shuffles by postponing them as late as possible. The intent +// here is to remove uncessary shuffles and also increases the oportunities for +// adjacent shuffles to be merged together. +// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetMachine.h" +#include "llvm/ADT/APInt.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "hex-shuff-vec" +/// A command line argument to limit the search space along def chain. +static cl::opt<int> MaxDefSearchCount( + "shuffvec-max-search-count", + cl::desc("Maximum number of instructions traversed along def chain."), + cl::Hidden, cl::init(15)); + +#ifndef NDEBUG +static cl::opt<int> + ShuffVecLimit("shuff-vec-max", + cl::desc("Maximum number of shuffles to be relocated."), + cl::Hidden, cl::init(-1)); +#endif + +namespace llvm { +void initializeHexagonOptShuffleVectorPass(PassRegistry &); +FunctionPass *createHexagonOptShuffleVector(const HexagonTargetMachine &); +} // end namespace llvm + +namespace { + +class HexagonOptShuffleVector : public FunctionPass { +public: + static char ID; +#ifndef NDEBUG + static int NumRelocated; +#endif + HexagonOptShuffleVector() : FunctionPass(ID) { + initializeHexagonOptShuffleVectorPass(*PassRegistry::getPassRegistry()); + } + + HexagonOptShuffleVector(const HexagonTargetMachine *TM) + : FunctionPass(ID), TM(TM) { + initializeHexagonOptShuffleVectorPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Hexagon Optimize Vector Shuffles"; + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + FunctionPass::getAnalysisUsage(AU); + } + +private: + using ValueVector = SmallVector<Value *, 8>; + const HexagonTargetMachine *TM = nullptr; + const HexagonSubtarget *HST = nullptr; + SmallPtrSet<Instruction *, 8> Visited; + using ShuffUseList = + SmallDenseMap<Instruction *, SmallVector<Instruction *, 2>>; + ShuffUseList ShuffUses; + int DefSearchCount; + + bool visitBlock(BasicBlock *B); + bool findNewShuffLoc(Instruction *I, ArrayRef<int> &ShuffMask, + Value *&NewLoc); + bool isValidIntrinsic(IntrinsicInst *I); + bool relocateShuffVec(Instruction *I, ArrayRef<int> &M, Value *NewLoc, + std::list<Instruction *> &WorkList); + bool getUseList(Instruction *I, ValueVector &UseList); + bool analyzeHiLoUse(Instruction *HI, Instruction *LO, + ArrayRef<int> &ShuffMask, Value *&NewLoc, + ShuffUseList &CurShuffUses); + bool isHILo(Value *V, bool IsHI); + bool hasDefWithSameShuffMask(Value *V, SmallVector<Instruction *, 2> &ImmUse, + ArrayRef<int> &ShuffMask, + ShuffUseList &CurShuffUses); + void FindHiLoUse(ValueVector &UseList, Instruction *&HI, Instruction *&LO); + bool isConcatMask(ArrayRef<int> &Mask, Instruction *ShuffInst); + bool isValidUseInstr(ValueVector &UseList, Instruction *&UI); + bool areAllOperandsValid(Instruction *I, Instruction *UI, + ArrayRef<int> &ShuffMask, + ShuffUseList &CurShuffUses); + Value *getOperand(Instruction *I, unsigned i); + static iterator_range<User::op_iterator> getArgOperands(User *U); + static std::pair<Value *, Value *> stripCasts(Value *V); + static bool isConstantVectorSplat(Value *V); +}; + +} // end anonymous namespace + +#ifndef NDEBUG +int HexagonOptShuffleVector::NumRelocated = 0; +#endif +char HexagonOptShuffleVector::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonOptShuffleVector, "shuff-vec", + "Hexagon Optimize Shuffle Vector", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(HexagonOptShuffleVector, "shuff-vec", + "Hexagon Optimize Shuffle Vector", false, false) + +bool HexagonOptShuffleVector::isConcatMask(ArrayRef<int> &Mask, + Instruction *ShuffInst) { + Type *ShuffTy = ShuffInst->getType(); + int NumElts = cast<FixedVectorType>(ShuffTy)->getNumElements(); + for (int i = 0; i < NumElts; i++) { + if (Mask[i] != i) + return false; + } + return true; +} + +bool HexagonOptShuffleVector::isValidIntrinsic(IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + default: + return false; + case Intrinsic::hexagon_V6_vaddubh_128B: + case Intrinsic::hexagon_V6_vadduhw_128B: + case Intrinsic::hexagon_V6_vaddhw_128B: + case Intrinsic::hexagon_V6_vaddh_dv_128B: + case Intrinsic::hexagon_V6_vsububh_128B: + case Intrinsic::hexagon_V6_vsubuhw_128B: + case Intrinsic::hexagon_V6_vsubhw_128B: + case Intrinsic::hexagon_V6_vsubh_dv_128B: + case Intrinsic::hexagon_V6_vmpyubv_128B: + case Intrinsic::hexagon_V6_vmpybv_128B: + case Intrinsic::hexagon_V6_vmpyuhv_128B: + case Intrinsic::hexagon_V6_vmpyhv_128B: + case Intrinsic::hexagon_V6_vmpybusv_128B: + case Intrinsic::hexagon_V6_vmpyhus_128B: + case Intrinsic::hexagon_V6_vavgb_128B: + case Intrinsic::hexagon_V6_vavgub_128B: + case Intrinsic::hexagon_V6_vavgh_128B: + case Intrinsic::hexagon_V6_vavguh_128B: + case Intrinsic::hexagon_V6_vavgw_128B: + case Intrinsic::hexagon_V6_vavguw_128B: + case Intrinsic::hexagon_V6_hi_128B: + case Intrinsic::hexagon_V6_lo_128B: + case Intrinsic::sadd_sat: + case Intrinsic::uadd_sat: + // Generic hexagon vector intrinsics + case Intrinsic::hexagon_vadd_su: + case Intrinsic::hexagon_vadd_uu: + case Intrinsic::hexagon_vadd_ss: + case Intrinsic::hexagon_vadd_us: + case Intrinsic::hexagon_vsub_su: + case Intrinsic::hexagon_vsub_uu: + case Intrinsic::hexagon_vsub_ss: + case Intrinsic::hexagon_vsub_us: + case Intrinsic::hexagon_vmpy_su: + case Intrinsic::hexagon_vmpy_uu: + case Intrinsic::hexagon_vmpy_ss: + case Intrinsic::hexagon_vmpy_us: + case Intrinsic::hexagon_vavgu: + case Intrinsic::hexagon_vavgs: + case Intrinsic::hexagon_vmpy_ub_b: + case Intrinsic::hexagon_vmpy_ub_ub: + case Intrinsic::hexagon_vmpy_uh_uh: + case Intrinsic::hexagon_vmpy_h_h: + return true; + } + llvm_unreachable("Unsupported instruction!"); +} + +bool HexagonOptShuffleVector::getUseList(Instruction *I, ValueVector &UseList) { + for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) { + Instruction *J = dyn_cast<Instruction>(*UI); + if (!J) + return false; + if (auto *C = dyn_cast<CastInst>(*UI)) { + if (!getUseList(C, UseList)) + return false; + } else + UseList.push_back(*UI); + ++UI; + } + return true; +} + +bool HexagonOptShuffleVector::isHILo(Value *V, bool IsHI) { + if (!(dyn_cast<Instruction>(V))) + return false; + Instruction *I = dyn_cast<Instruction>(V); + if (!isa<CallInst>(I)) + return false; + IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); + if (!II) + return false; + if ((II->getIntrinsicID() == Intrinsic::hexagon_V6_hi_128B && IsHI) || + (II->getIntrinsicID() == Intrinsic::hexagon_V6_lo_128B && !IsHI)) + return true; + return false; +} + +Value *HexagonOptShuffleVector::getOperand(Instruction *I, unsigned i) { + Value *V = I->getOperand(i); + if (auto *C = dyn_cast<CastInst>(V)) + return C->getOperand(0); + return V; +} + +iterator_range<User::op_iterator> +HexagonOptShuffleVector::getArgOperands(User *U) { + if (auto *CB = dyn_cast<CallBase>(U)) + return CB->args(); + return U->operands(); +} + +// Strip out all the cast operations to find the first non-cast definition of a +// value. The function also returns the last cast operation in the def-chain. +std::pair<Value *, Value *> HexagonOptShuffleVector::stripCasts(Value *V) { + Value *LastCast = nullptr; + while (auto *C = dyn_cast<CastInst>(V)) { + LastCast = V; + V = C->getOperand(0); + } + return std::make_pair(V, LastCast); +} + +bool HexagonOptShuffleVector::isConstantVectorSplat(Value *V) { + if (auto *CV = dyn_cast<ConstantVector>(V)) + return CV->getSplatValue(); + if (auto *CV = dyn_cast<ConstantDataVector>(V)) + return CV->isSplat(); + return false; +} + +// Make sure all the operations on HI and LO counterparts are identical +// until both halves are merged together. When a merge point (concat) +// is found, set it as 'NewLoc' and return. +bool HexagonOptShuffleVector::analyzeHiLoUse(Instruction *HI, Instruction *LO, + ArrayRef<int> &ShuffMask, + Value *&NewLoc, + ShuffUseList &CurShuffUses) { + ValueVector HiUseList, LoUseList; + getUseList(HI, HiUseList); + getUseList(LO, LoUseList); + + // To keep the analsis simple, only handle Hi and Lo with a single use. Also, + // not even sure at this point if it will be profitable due to multiple + // merge points. + if (HiUseList.size() != 1 || LoUseList.size() != 1) + return false; + + Instruction *HiUse = dyn_cast<Instruction>(HiUseList[0]); + Instruction *LoUse = dyn_cast<Instruction>(LoUseList[0]); + if (!HiUse || !LoUse) + return false; + + bool IsUseIntrinsic = false; + if (isa<CallInst>(HiUse)) { + if (!isa<CallInst>(LoUse)) + return false; + // Continue only if both Hi and Lo uses are calls to the same intrinsic. + IntrinsicInst *HiUseII = dyn_cast<IntrinsicInst>(HiUse); + IntrinsicInst *LoUseII = dyn_cast<IntrinsicInst>(LoUse); + if (!HiUseII || !LoUseII || + HiUseII->getIntrinsicID() != LoUseII->getIntrinsicID() || + !isValidIntrinsic(HiUseII)) + return false; + IsUseIntrinsic = true; + HiUse = HiUseII; + LoUse = LoUseII; + } + if (HiUse->getOpcode() != LoUse->getOpcode()) + return false; + + // If both Hi and Lo use are same and is a concat operation, set it + // as a 'NewLoc'. + if (HiUse == LoUse) { + // Return true if use is a concat of Hi and Lo. + ArrayRef<int> M; + if (match(HiUse, (m_Shuffle(m_Value(), m_Value(), m_Mask(M))))) { + if (isConcatMask(M, HiUse)) { + NewLoc = HiUse; + return true; + } + } + return false; + } + + // Check if HiUse and LoUse are shuffles with the same mask. If so, safe to + // continue the search. + ArrayRef<int> M1, M2; + if (match(HiUse, (m_Shuffle(m_Value(), m_Poison(), m_Mask(M1)))) && + match(LoUse, (m_Shuffle(m_Value(), m_Poison(), m_Mask(M2)))) && + M1.equals(M2)) + return analyzeHiLoUse(HiUse, LoUse, ShuffMask, NewLoc, CurShuffUses); + + // For now, only handling binary ops and some of the instrinsics + // which appear to be safe (hardcoded in isValidIntrinsic()). + if (!HiUse->isBinaryOp() && !IsUseIntrinsic) + return false; + + ValueVector HiUseOperands, LoUseOperands; + int HiOpNum = -1, LoOpNum = -1; + for (unsigned i = 0; i < HiUse->getNumOperands(); i++) { + Value *V = getOperand(HiUse, i); + if (V == HI) + HiOpNum = i; + else + HiUseOperands.push_back(V); + } + for (unsigned i = 0; i < LoUse->getNumOperands(); i++) { + Value *V = getOperand(LoUse, i); + if (V == LO) + LoOpNum = i; + else + LoUseOperands.push_back(V); + } + + // Enforcing strict ordering which is not necessary in case of + // commutative operations and may be relaxed in future if needed. + if (HiOpNum < 0 || HiOpNum != LoOpNum || + LoUseOperands.size() != HiUseOperands.size()) + return false; + + unsigned NumOperands = HiUseOperands.size(); + for (unsigned i = 0; i < NumOperands; i++) { + if (HiUseOperands[i] == LoUseOperands[i]) + continue; + // Only handle the case where other operands to Hi and Lo uses + // are comming from another Hi and Lo pair. + if (!isHILo(HiUseOperands[i], true) || !isHILo(LoUseOperands[i], false)) + return false; + + Value *DefHiUse = dyn_cast<Instruction>(HiUseOperands[i])->getOperand(0); + Value *DefLoUse = dyn_cast<Instruction>(LoUseOperands[i])->getOperand(0); + if (!DefHiUse || DefHiUse != DefLoUse) + return false; + SmallVector<Instruction *, 2> ImmUseList; + if (dyn_cast<CastInst>(DefHiUse)) + ImmUseList.push_back(dyn_cast<Instruction>(DefHiUse)); + else { + ImmUseList.push_back(HiUse); + ImmUseList.push_back(LoUse); + } + + // Make sure that the Hi/Lo def has the same shuffle mask. + if (!hasDefWithSameShuffMask(DefHiUse, ImmUseList, ShuffMask, CurShuffUses)) + return false; + } + + // Continue the search along Hi/Lo use-chain. + return analyzeHiLoUse(HiUse, LoUse, ShuffMask, NewLoc, CurShuffUses); +} + +bool HexagonOptShuffleVector::hasDefWithSameShuffMask( + Value *V, SmallVector<Instruction *, 2> &ImmUses, ArrayRef<int> &ShuffMask, + ShuffUseList &CurShuffUses) { + // Follow def-chain until we have found a shuffle_vector or have run out + // of max number of attempts. + if (DefSearchCount >= MaxDefSearchCount) + return false; + + ++DefSearchCount; + V = stripCasts(V).first; + Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return false; + bool Found = true; + ArrayRef<int> M; + if (match(V, (m_Shuffle(m_Value(), m_Value(), m_Mask(M)))) && + M.equals(ShuffMask)) { + CurShuffUses[I] = ImmUses; + return true; + } + if ((match(V, m_Shuffle(m_InsertElt(m_Poison(), m_Value(), m_Zero()), + m_Poison(), m_ZeroMask())))) + return true; // scalar converted to a vector + + auto *II = dyn_cast<IntrinsicInst>(I); + if (!I->isBinaryOp() && (!II || !isValidIntrinsic(II))) + return false; + + for (Value *OpV : getArgOperands(I)) { + std::pair<Value *, Value *> P = stripCasts(OpV); + OpV = P.first; + + SmallVector<Instruction *, 2> ImmUseList; + if (P.second) + ImmUseList.push_back(dyn_cast<Instruction>(P.second)); + else + ImmUseList.push_back(dyn_cast<Instruction>(I)); + + if (isa<PoisonValue>(OpV)) + continue; + if (isConstantVectorSplat(OpV)) + continue; + if (!dyn_cast<Instruction>(OpV)) + return false; + if ((match(OpV, m_Shuffle(m_InsertElt(m_Poison(), m_Value(), m_Zero()), + m_Poison(), m_ZeroMask())))) + continue; + Found &= hasDefWithSameShuffMask(OpV, ImmUseList, ShuffMask, CurShuffUses); + } + return Found; +} + +void HexagonOptShuffleVector::FindHiLoUse(ValueVector &UseList, + Instruction *&HI, Instruction *&LO) { + + for (unsigned i = 0; i < UseList.size(); i++) { + auto *J = dyn_cast<Instruction>(UseList[i]); + auto *CI = dyn_cast<CallInst>(J); + if (CI) { + auto *II = dyn_cast<IntrinsicInst>(CI); + if (II) { + Intrinsic::ID IntID = II->getIntrinsicID(); + if (IntID == Intrinsic::hexagon_V6_hi_128B) + HI = J; + if (IntID == Intrinsic::hexagon_V6_lo_128B) + LO = J; + } + } + } +} + +bool HexagonOptShuffleVector::isValidUseInstr(ValueVector &UseList, + Instruction *&UI) { + // Don't allow multiple uses. Only done in case of a Hi/Lo pair. + if (UseList.size() != 1) + return false; + UI = dyn_cast<Instruction>(UseList[0]); + if (!UI) + return false; + // Should be either a binary op or one of the supported instrinsics. + if (auto *CI = dyn_cast<CallInst>(UI)) { + auto *II = dyn_cast<IntrinsicInst>(CI); + if (!II || !isValidIntrinsic(II)) + return false; + UI = II; + } else if (!UI->isBinaryOp()) + return false; + return true; +} + +// Check all the operands of 'Use' to make sure that they are either: +// 1) a constant +// 2) a scalar +// 3) a constant vector +// 4) a vector using the same mask as I +bool HexagonOptShuffleVector::areAllOperandsValid(Instruction *I, + Instruction *Use, + ArrayRef<int> &ShuffMask, + ShuffUseList &CurShuffUses) { + bool AllOperandsOK = true; + for (Value *OpV : getArgOperands(Use)) { + bool HasOneUse = OpV->hasOneUse(); + std::pair<Value *, Value *> P = stripCasts(OpV); + OpV = P.first; + + SmallVector<Instruction *, 2> ImmUseList; + if (P.second) + ImmUseList.push_back(dyn_cast<Instruction>(P.second)); + else + ImmUseList.push_back(dyn_cast<Instruction>(Use)); + + if (OpV == I || isa<PoisonValue>(OpV)) + continue; + if (isConstantVectorSplat(OpV)) + continue; + if (!dyn_cast<Instruction>(OpV) || !HasOneUse) + return false; + + if ((match(OpV, m_Shuffle(m_InsertElt(m_Poison(), m_Value(), m_Zero()), + m_Poison(), m_ZeroMask())))) + continue; + AllOperandsOK &= + hasDefWithSameShuffMask(OpV, ImmUseList, ShuffMask, CurShuffUses); + } + return AllOperandsOK; +} + +// Find the new location where it's safe to relocate shuffle instruction 'I'. +bool HexagonOptShuffleVector::findNewShuffLoc(Instruction *I, + ArrayRef<int> &ShuffMask, + Value *&NewLoc) { + DefSearchCount = 0; + ValueVector UseList; + if (!getUseList(I, UseList)) + return false; + + using ShuffUseList = + SmallDenseMap<Instruction *, SmallVector<Instruction *, 2>>; + ShuffUseList CurShuffUses; + // Check for Hi and Lo pair. + Instruction *HI = nullptr, *LO = nullptr; + FindHiLoUse(UseList, HI, LO); + if (UseList.size() == 2 && HI && LO) { + // If 'I' has Hi and Lo use-pair, then it can be relocated only after Hi/Lo + // use-chain's merge point, i.e., after a concat vector provided it's safe + // to do so. + LLVM_DEBUG({ + dbgs() << "\tFollowing the Hi/LO pair :\n"; + dbgs() << "\t\tHI - "; + HI->dump(); + dbgs() << "\t\tLO - "; + LO->dump(); + }); + if (!analyzeHiLoUse(HI, LO, ShuffMask, NewLoc, CurShuffUses)) + return false; + for (auto &it : CurShuffUses) + ShuffUses[it.first] = it.second; + return true; + } else { // Single use case + Instruction *UI = nullptr; + if (!isValidUseInstr(UseList, UI)) + return false; + assert(UI && "Expected a valid use, but found none!!"); + + if (HI || LO) { + // If the single use case is either Hi or Lo, it is not safe to relocate + return false; + } + + LLVM_DEBUG(dbgs() << "\tChecking operands in 'use' : \n\t\t"; UI->dump()); + if (!areAllOperandsValid(I, UI, ShuffMask, CurShuffUses)) { + LLVM_DEBUG(dbgs() << "\t\tNOT SAFE -- Exiting!!\n"); + return false; + } + for (auto &it : CurShuffUses) + ShuffUses[it.first] = it.second; + NewLoc = UI; + // Keep looking for the new location until can't proceed any longer. + findNewShuffLoc(UI, ShuffMask, NewLoc); + } + return true; +} + +// Move shuffle instruction 'I' after 'NewLoc'. +bool HexagonOptShuffleVector::relocateShuffVec( + Instruction *I, ArrayRef<int> &M, Value *NewLoc, + std::list<Instruction *> &WorkList) { + // Remove original vector shuffles at the input operands. + // However, it can be done only if the replacements have the + // same number of vector elements as the original operands. + std::map<Instruction *, Value *> InstrMap; + bool CanReplace = true; + unsigned ShuffInstCount = ShuffUses.size(); + for (auto &it : ShuffUses) { + Instruction *J = it.first; + Visited.insert(J); + Value *ShuffleOP = nullptr; + match(J, (m_Shuffle(m_Value(ShuffleOP), m_Poison(), m_Mask(M)))); + VectorType *JTy = cast<FixedVectorType>(J->getType()); + VectorType *ShuffTy = cast<FixedVectorType>(ShuffleOP->getType()); + if (JTy->getElementCount() != ShuffTy->getElementCount()) + CanReplace = false; + + // Relocate shufflevector after a wider instruction only if there are + // at least two or more shufflevectors being relocated in order for the + // relocation to be profitable as otherwise it will require more shuffles. + VectorType *NewShuffTy = cast<FixedVectorType>(NewLoc->getType()); + if (ShuffInstCount == 1 && + NewShuffTy->getElementType() > ShuffTy->getElementType()) + CanReplace = false; + InstrMap[J] = ShuffleOP; + } + if (!CanReplace) { + LLVM_DEBUG(dbgs() << "\tRelocation FAILED!! \n"); + return false; + } + for (auto IM : InstrMap) { + Instruction *J = IM.first; + assert(ShuffUses.count(J)); + SmallVector<Instruction *, 2> Uses = ShuffUses[J]; + if (Uses.size() > 0) { + for (auto *U : Uses) + U->replaceUsesOfWith(IM.first, IM.second); + } else + // This is the shuffle we started with, and we have already made sure + // that it has either single use or a HI/LO use pair. So, it's okay + // to replace all its uses with the input to the shuffle instruction. + IM.first->replaceAllUsesWith(IM.second); + } + // Shuffle the output of NewLoc based on the original mask. + Instruction *Pos = dyn_cast<Instruction>(NewLoc); + assert(Pos); + Pos = Pos->getNextNode(); + IRBuilder<> IRB(Pos); + Value *NewShuffV = + IRB.CreateShuffleVector(NewLoc, PoisonValue::get(NewLoc->getType()), M); + Instruction *NewInst = dyn_cast<Instruction>(NewShuffV); + if (!NewInst) { + LLVM_DEBUG(dbgs() << "\tRelocation FAILED!! \n"); + return false; + } + for (auto UI = NewLoc->user_begin(), UE = NewLoc->user_end(); UI != UE;) { + Use &TheUse = UI.getUse(); + ++UI; + Instruction *J = dyn_cast<Instruction>(TheUse.getUser()); + if (J && TheUse.getUser() != NewShuffV) + J->replaceUsesOfWith(NewLoc, NewShuffV); + } + WorkList.push_back(NewInst); + LLVM_DEBUG(dbgs() << "\tRelocation Successfull!! \n"); + LLVM_DEBUG(dbgs() << "\tAdded to Worklist :\n"; NewInst->dump()); + return true; +} + +bool HexagonOptShuffleVector::visitBlock(BasicBlock *B) { + bool Changed = false; + ArrayRef<int> M; + std::list<Instruction *> WorkList; + LLVM_DEBUG(dbgs() << "Preparing worklist for BB:\n"); + LLVM_DEBUG(B->dump()); + for (auto &I : *B) { + if (match(&I, (m_Shuffle(m_Value(), m_Value(), m_ZeroMask())))) + continue; // Skip - building vector from a scalar + if (match(&I, (m_Shuffle(m_Value(), m_Poison(), m_Mask(M))))) { + WorkList.push_back(&I); + LLVM_DEBUG(dbgs() << "\tAdded instr - "; I.dump()); + } + } + + LLVM_DEBUG(dbgs() << "Processing worklist:\n"); + while (!WorkList.empty()) { +#ifndef NDEBUG + int Limit = ShuffVecLimit; + if (Limit >= 0) { + if (NumRelocated >= ShuffVecLimit) { + LLVM_DEBUG({ + dbgs() << "Reached maximum limit!! \n"; + dbgs() << "Can't process any more shuffles.... \n"; + }); + return Changed; + } + } +#endif + Instruction *I = WorkList.front(); + WorkList.pop_front(); + LLVM_DEBUG(dbgs() << "\tProcessing instr - "; I->dump()); + Value *NewLoc = nullptr; + + // 'ShuffUses' is used to keep track of the vector shuffles that need to + // be relocated along with their immediate uses that are known to satisfy + // all the safety requirements of the relocation. + // NOTE: The shuffle instr 'I', where the analysis starts, doesn't have + // its immediate uses set in 'ShuffUses'. This can be done but isn't + // necessary. At this point, only shuffles with single use or a HI/LO pair + // are allowed. This is done mostly because those with the multiple uses + // aren't expected to be much profitable and can be extended in the future + // if necessary. For now, all the uses in such cases can be safely updated + // when the corresponding vector shuffle is relocated. + + ShuffUses.clear(); + ShuffUses[I] = SmallVector<Instruction *, 2>(); + // Skip if node already visited. + if (!Visited.insert(I).second) { + LLVM_DEBUG(dbgs() << "\t\tSKIPPING - Already visited ...\n"); + continue; + } + if (!match(I, (m_Shuffle(m_Value(), m_Poison(), m_Mask(M))))) { + LLVM_DEBUG(dbgs() << "\t\tSKIPPING - Not a vector shuffle ...\n"); + continue; + } + if (!findNewShuffLoc(I, M, NewLoc) || !NewLoc) { + LLVM_DEBUG(dbgs() << "\t\tSKIPPING - NewLoc not found ...\n"); + continue; + } + LLVM_DEBUG(dbgs() << "\t\tRelocating after -- "; NewLoc->dump()); + Changed |= relocateShuffVec(I, M, NewLoc, WorkList); +#ifndef NDEBUG + NumRelocated++; +#endif + } + return Changed; +} + +bool HexagonOptShuffleVector::runOnFunction(Function &F) { + HST = TM->getSubtargetImpl(F); + // Works only for 128B mode but can be extended for 64B if needed. + if (skipFunction(F) || !HST->useHVX128BOps()) + return false; + + bool Changed = false; + for (auto &B : F) + Changed |= visitBlock(&B); + + return Changed; +} + +FunctionPass * +llvm::createHexagonOptShuffleVector(const HexagonTargetMachine &TM) { + return new HexagonOptShuffleVector(&TM); +} diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index e40dbd2..e84070f 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -391,7 +391,6 @@ def Fptoui: pf1<fp_to_uint>; def Sitofp: pf1<sint_to_fp>; def Uitofp: pf1<uint_to_fp>; - // --(1) Immediate ------------------------------------------------------- // @@ -474,6 +473,18 @@ def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>; def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>; def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>; +def: Pat<(i32 (fp_to_bf16 F32:$v)), + (C2_mux (F2_sfclass F32:$v, 0x10), (A2_tfrsi(i32 0x7fff)), + (C2_mux + (C2_cmpeq + (A2_and F32:$v, (A2_tfrsi (i32 0x1FFFF))), + (A2_tfrsi (i32 0x08000))), + (A2_and (A2_asrh F32:$v), (A2_tfrsi (i32 65535))), + (A2_and + (A2_asrh + (A2_add F32:$v, (A2_and F32:$v, (A2_tfrsi (i32 0x8000))))), + (A2_tfrsi (i32 65535)))) + )>; // Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>; def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>; diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index d19920c..674d191 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -15,12 +15,14 @@ def HVI16: PatLeaf<(VecI16 HvxVR:$R)>; def HVI32: PatLeaf<(VecI32 HvxVR:$R)>; def HVF16: PatLeaf<(VecF16 HvxVR:$R)>; def HVF32: PatLeaf<(VecF32 HvxVR:$R)>; +def HVBF16: PatLeaf<(VecBF16 HvxVR:$R)>; def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; def HWF16: PatLeaf<(VecPF16 HvxWR:$R)>; def HWF32: PatLeaf<(VecPF32 HvxWR:$R)>; +def HWBF16: PatLeaf<(VecBF16 HvxWR:$R)>; def SDTVecUnaryOp: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; @@ -182,12 +184,15 @@ let Predicates = [UseHVX] in { } let Predicates = [UseHVXV68] in { - defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF16, IsVecOff>; - defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF32, IsVecOff>; - defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecF16, IsVecOff>; - defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecF32, IsVecOff>; - defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF16, IsVecOff>; - defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF32, IsVecOff>; + defm : HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecBF16, IsVecOff>; + defm : HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF16, IsVecOff>; + defm : HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF32, IsVecOff>; + defm : HvxLda_pat<V6_vL32b_ai, alignedload, VecBF16, IsVecOff>; + defm : HvxLda_pat<V6_vL32b_ai, alignedload, VecF16, IsVecOff>; + defm : HvxLda_pat<V6_vL32b_ai, alignedload, VecF32, IsVecOff>; + defm : HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecBF16, IsVecOff>; + defm : HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF16, IsVecOff>; + defm : HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF32, IsVecOff>; } // HVX stores @@ -233,10 +238,13 @@ let Predicates = [UseHVX] in { } let Predicates = [UseHVXV68] in { + defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVBF16, IsVecOff>; defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF16, IsVecOff>; defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF32, IsVecOff>; + defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVBF16, IsVecOff>; defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVF16, IsVecOff>; defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVF32, IsVecOff>; + defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVBF16, IsVecOff>; defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVF16, IsVecOff>; defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVF32, IsVecOff>; } @@ -253,20 +261,36 @@ let Predicates = [UseHVX] in { defm: NopCast_pat<VecPI16, VecPI32, HvxWR>; } +let Predicates = [UseHVXV68] in { + defm: NopCast_pat<VecI8, VecF16, HvxVR>; + defm: NopCast_pat<VecI16, VecF16, HvxVR>; + defm: NopCast_pat<VecI32, VecF16, HvxVR>; + defm: NopCast_pat<VecF32, VecF16, HvxVR>; + defm: NopCast_pat<VecPI8, VecPF32, HvxWR>; + defm: NopCast_pat<VecPI16, VecPF32, HvxWR>; + defm: NopCast_pat<VecPI32, VecPF32, HvxWR>; +} + let Predicates = [UseHVX, UseHVXFloatingPoint] in { defm: NopCast_pat<VecI8, VecF16, HvxVR>; + defm: NopCast_pat<VecI8, VecBF16, HvxVR>; defm: NopCast_pat<VecI8, VecF32, HvxVR>; defm: NopCast_pat<VecI16, VecF16, HvxVR>; + defm: NopCast_pat<VecI16, VecBF16, HvxVR>; defm: NopCast_pat<VecI16, VecF32, HvxVR>; defm: NopCast_pat<VecI32, VecF16, HvxVR>; + defm: NopCast_pat<VecI32, VecBF16, HvxVR>; defm: NopCast_pat<VecI32, VecF32, HvxVR>; defm: NopCast_pat<VecF16, VecF32, HvxVR>; defm: NopCast_pat<VecPI8, VecPF16, HvxWR>; + defm: NopCast_pat<VecPI8, VecPBF16, HvxWR>; defm: NopCast_pat<VecPI8, VecPF32, HvxWR>; defm: NopCast_pat<VecPI16, VecPF16, HvxWR>; + defm: NopCast_pat<VecPI16, VecPBF16, HvxWR>; defm: NopCast_pat<VecPI16, VecPF32, HvxWR>; defm: NopCast_pat<VecPI32, VecPF16, HvxWR>; + defm: NopCast_pat<VecPI32, VecPBF16, HvxWR>; defm: NopCast_pat<VecPI32, VecPF32, HvxWR>; defm: NopCast_pat<VecPF16, VecPF32, HvxWR>; } @@ -293,6 +317,8 @@ let Predicates = [UseHVX] in { (Combinev HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)), (Combinev HvxVR:$Vt, HvxVR:$Vs)>; + def: Pat<(VecPF32 (concat_vectors HVF32:$Vs, HVF32:$Vt)), + (Combinev HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(VecQ8 (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qt, $Qs)>; def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qt, $Qs)>; @@ -315,11 +341,14 @@ let Predicates = [UseHVX] in { let Predicates = [UseHVX, UseHVXFloatingPoint] in { let AddedComplexity = 100 in { def: Pat<(VecF16 vzero), (V6_vd0)>; + def: Pat<(VecBF16 vzero), (V6_vd0)>; def: Pat<(VecF32 vzero), (V6_vd0)>; def: Pat<(VecPF16 vzero), (PS_vdd0)>; + def: Pat<(VecPBF16 vzero), (PS_vdd0)>; def: Pat<(VecPF32 vzero), (PS_vdd0)>; def: Pat<(concat_vectors (VecF16 vzero), (VecF16 vzero)), (PS_vdd0)>; + def : Pat<(concat_vectors (VecBF16 vzero), (VecBF16 vzero)), (PS_vdd0)>; def: Pat<(concat_vectors (VecF32 vzero), (VecF32 vzero)), (PS_vdd0)>; } @@ -355,11 +384,13 @@ let Predicates = [UseHVX] in { let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { let AddedComplexity = 30 in { def: Pat<(VecF16 (splat_vector u16_0ImmPred:$V)), (PS_vsplatih imm:$V)>; + def: Pat<(VecBF16 (splat_vector u16_0ImmPred:$V)), (PS_vsplatih imm:$V)>; def: Pat<(VecF32 (splat_vector anyint:$V)), (PS_vsplatiw imm:$V)>; def: Pat<(VecF32 (splat_vector f32ImmPred:$V)), (PS_vsplatiw (ftoi $V))>; } let AddedComplexity = 20 in { def: Pat<(VecF16 (splat_vector I32:$Rs)), (PS_vsplatrh $Rs)>; + def: Pat<(VecBF16 (splat_vector I32:$Rs)), (PS_vsplatrh $Rs)>; def: Pat<(VecF32 (splat_vector I32:$Rs)), (PS_vsplatrw $Rs)>; def: Pat<(VecF32 (splat_vector F32:$Rs)), (PS_vsplatrw $Rs)>; } @@ -519,6 +550,35 @@ let Predicates = [UseHVXV68, UseHVXIEEEFP] in { def: Pat<(VecPF16 (Uitofp HVI8:$Vu)), (V6_vcvt_hf_ub HvxVR:$Vu)>; } +let Predicates = [UseHVXV81] in { + def : Pat<(VecBF16 (pf1<fpround> HWF32:$Vuu)), + (V6_vpackwuh_sat (V6_vmux + (V6_veqsf (HiVec HvxWR:$Vuu), (HiVec HvxWR:$Vuu)), + (V6_vlsrw (V6_vmux (V6_veqw (V6_vand (HiVec HvxWR:$Vuu), + (PS_vsplatiw (i32 0x1FFFF))), + (PS_vsplatiw (i32 0x08000))), + (HiVec HvxWR:$Vuu), + (V6_vaddw (HiVec HvxWR:$Vuu), + (V6_vand (HiVec HvxWR:$Vuu), + (PS_vsplatiw (i32 0x8000))))), + (A2_tfrsi 16)), + (PS_vsplatih (i32 0x7fff))), + (V6_vmux (V6_veqsf (LoVec HvxWR:$Vuu), (LoVec HvxWR:$Vuu)), + (V6_vlsrw (V6_vmux (V6_veqw (V6_vand (LoVec HvxWR:$Vuu), + (PS_vsplatiw (i32 0x1FFFF))), + (PS_vsplatiw (i32 0x08000))), + (LoVec HvxWR:$Vuu), + (V6_vaddw (LoVec HvxWR:$Vuu), + (V6_vand (LoVec HvxWR:$Vuu), + (PS_vsplatiw (i32 0x8000))))), + (A2_tfrsi 16)), + (PS_vsplatih (i32 0x7fff))))>; +} + +let Predicates = [UseHVXV73, UseHVXQFloat] in { + def : Pat<(VecF32 (Sitofp HVI32:$Vu)), (V6_vconv_sf_w HvxVR:$Vu)>; +} + let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt), (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; @@ -531,6 +591,13 @@ let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; } +let Predicates = [UseHVXV81, UseHVXFloatingPoint] in { + def : Pat<(vselect HQ16:$Qu, HVBF16:$Vs, HVBF16:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; + def : Pat<(vselect (qnot HQ16:$Qu), HVBF16:$Vs, HVBF16:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; +} + let Predicates = [UseHVXV68, UseHVX128B, UseHVXQFloat] in { let AddedComplexity = 220 in { defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect, setgt, VecQ16, HVF16>; diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp index f29a739..c9cb449 100644 --- a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp @@ -58,7 +58,7 @@ // are PHI inst. // //===----------------------------------------------------------------------===// -#include <unordered_set> + #define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass" #include "Hexagon.h" @@ -77,7 +77,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <map> -#include <vector> #define DEBUG_TYPE "hexagon-qfp-optimizer" @@ -86,6 +85,9 @@ using namespace llvm; cl::opt<bool> DisableQFOptimizer("disable-qfp-opt", cl::init(false), cl::desc("Disable optimization of Qfloat operations.")); +cl::opt<bool> DisableQFOptForMul( + "disable-qfp-opt-mul", cl::init(true), + cl::desc("Disable optimization of Qfloat operations for multiply.")); namespace { const std::map<unsigned short, unsigned short> QFPInstMap{ @@ -101,11 +103,16 @@ const std::map<unsigned short, unsigned short> QFPInstMap{ {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16}, {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf}, {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16}, - {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}}; + {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}, + {Hexagon::V6_vilog2_sf, Hexagon::V6_vilog2_qf32}, + {Hexagon::V6_vilog2_hf, Hexagon::V6_vilog2_qf16}, + {Hexagon::V6_vabs_qf32_sf, Hexagon::V6_vabs_qf32_qf32}, + {Hexagon::V6_vabs_qf16_hf, Hexagon::V6_vabs_qf16_qf16}, + {Hexagon::V6_vneg_qf32_sf, Hexagon::V6_vneg_qf32_qf32}, + {Hexagon::V6_vneg_qf16_hf, Hexagon::V6_vneg_qf16_qf16}}; } // namespace namespace { - struct HexagonQFPOptimizer : public MachineFunctionPass { public: static char ID; @@ -116,6 +123,10 @@ public: bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB); + bool optimizeQfpTwoOp(MachineInstr *MI, MachineBasicBlock *MBB); + + bool optimizeQfpOneOp(MachineInstr *MI, MachineBasicBlock *MBB); + StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -142,19 +153,69 @@ FunctionPass *llvm::createHexagonQFPOptimizer() { bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB) { - // Early exit: - // - if instruction is invalid or has too few operands (QFP ops need 2 sources - // + 1 dest), - // - or does not have a transformation mapping. - if (MI->getNumOperands() < 3) + if (MI->getNumOperands() == 2) + return optimizeQfpOneOp(MI, MBB); + else if (MI->getNumOperands() == 3) + return optimizeQfpTwoOp(MI, MBB); + else return false; +} + +bool HexagonQFPOptimizer::optimizeQfpOneOp(MachineInstr *MI, + MachineBasicBlock *MBB) { + + unsigned Op0F = 0; auto It = QFPInstMap.find(MI->getOpcode()); if (It == QFPInstMap.end()) return false; + unsigned short InstTy = It->second; + // Get the reachind defs of MI + MachineInstr *DefMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + MachineOperand &Res = MI->getOperand(0); + if (!Res.isReg()) + return false; + + LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI->dump()); + MachineInstr *ReachDefDef = nullptr; + + // Get the reaching def of the reaching def to check for W reg def + if (DefMI->getNumOperands() > 1 && DefMI->getOperand(1).isReg() && + DefMI->getOperand(1).getReg().isVirtual()) + ReachDefDef = MRI->getVRegDef(DefMI->getOperand(1).getReg()); + unsigned ReachDefOp = DefMI->getOpcode(); + MachineInstrBuilder MIB; + + // Check if the reaching def is a conversion + if (ReachDefOp == Hexagon::V6_vconv_sf_qf32 || + ReachDefOp == Hexagon::V6_vconv_hf_qf16) { + + // Return if the reaching def of reaching def is W type + if (ReachDefDef && MRI->getRegClass(ReachDefDef->getOperand(0).getReg()) == + &Hexagon::HvxWRRegClass) + return false; + + // Analyze the use operands of the conversion to get their KILL status + MachineOperand &SrcOp = DefMI->getOperand(1); + Op0F = getKillRegState(SrcOp.isKill()); + SrcOp.setIsKill(false); + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(SrcOp.getReg(), Op0F, SrcOp.getSubReg()); + LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); + return true; + } + return false; +} + +bool HexagonQFPOptimizer::optimizeQfpTwoOp(MachineInstr *MI, + MachineBasicBlock *MBB) { unsigned Op0F = 0; unsigned Op1F = 0; + auto It = QFPInstMap.find(MI->getOpcode()); + if (It == QFPInstMap.end()) + return false; + unsigned short InstTy = It->second; // Get the reaching defs of MI, DefMI1 and DefMI2 MachineInstr *DefMI1 = nullptr; MachineInstr *DefMI2 = nullptr; @@ -167,6 +228,9 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, return false; MachineOperand &Res = MI->getOperand(0); + if (!Res.isReg()) + return false; + MachineInstr *Inst1 = nullptr; MachineInstr *Inst2 = nullptr; LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump(); @@ -185,7 +249,8 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, unsigned Def2OP = DefMI2->getOpcode(); MachineInstrBuilder MIB; - // Case 1: Both reaching defs of MI are qf to sf/hf conversions + + // Check if the both the reaching defs of MI are qf to sf/hf conversions if ((Def1OP == Hexagon::V6_vconv_sf_qf32 && Def2OP == Hexagon::V6_vconv_sf_qf32) || (Def1OP == Hexagon::V6_vconv_hf_qf16 && @@ -226,7 +291,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); return true; - // Case 2: Left operand is conversion to sf/hf + // Check if left operand's reaching def is a conversion to sf/hf } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 && Def2OP != Hexagon::V6_vconv_sf_qf32) || (Def1OP == Hexagon::V6_vconv_hf_qf16 && @@ -250,7 +315,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); return true; - // Case 2: Left operand is conversion to sf/hf + // Check if right operand's reaching def is a conversion to sf/hf } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 && Def2OP == Hexagon::V6_vconv_sf_qf32) || (Def1OP != Hexagon::V6_vconv_hf_qf16 && @@ -258,13 +323,6 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, !DefMI1->isPHI() && (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) { // The second operand of original instruction is converted. - // In "mix" instructions, "qf" operand is always the first operand. - - // Caveat: vsub is not commutative w.r.t operands. - if (InstTy == Hexagon::V6_vsub_qf16_mix || - InstTy == Hexagon::V6_vsub_qf32_mix) - return false; - if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) == &Hexagon::HvxWRRegClass) return false; @@ -275,10 +333,26 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, Op1F = getKillRegState(Src2.isKill()); Src2.setIsKill(false); Op0F = getKillRegState(Src1.isKill()); - MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) - .addReg(Src2.getReg(), Op1F, - Src2.getSubReg()) // Notice the operands are flipped. - .addReg(Src1.getReg(), Op0F, Src1.getSubReg()); + if (InstTy == Hexagon::V6_vsub_qf16_mix || + InstTy == Hexagon::V6_vsub_qf32_mix) { + if (!HST->useHVXV81Ops()) + // vsub_(hf|sf)_mix insts are only avlbl on hvx81+ + return false; + // vsub is not commutative w.r.t. operands -> treat it as a special case + // to choose the correct mix instruction. + if (Def2OP == Hexagon::V6_vconv_sf_qf32) + InstTy = Hexagon::V6_vsub_sf_mix; + else if (Def2OP == Hexagon::V6_vconv_hf_qf16) + InstTy = Hexagon::V6_vsub_hf_mix; + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()) + .addReg(Src2.getReg(), Op1F, Src2.getSubReg()); + } else { + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src2.getReg(), Op1F, + Src2.getSubReg()) // Notice the operands are flipped. + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()); + } LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); return true; } @@ -309,15 +383,18 @@ bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) { while (MII != MBBI->instr_end()) { MachineInstr *MI = &*MII; ++MII; // As MI might be removed. - - if (QFPInstMap.count(MI->getOpcode()) && - MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 && - MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) { - LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump()); - if (optimizeQfp(MI, MBB)) { - MI->eraseFromParent(); - LLVM_DEBUG(dbgs() << "\t....Removing...."); - Changed = true; + if (QFPInstMap.count(MI->getOpcode())) { + auto OpC = MI->getOpcode(); + if (DisableQFOptForMul && HII->isQFPMul(MI)) + continue; + if (OpC != Hexagon::V6_vconv_sf_qf32 && + OpC != Hexagon::V6_vconv_hf_qf16) { + LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump()); + if (optimizeQfp(MI, MBB)) { + MI->eraseFromParent(); + LLVM_DEBUG(dbgs() << "\t....Removing...."); + Changed = true; + } } } } diff --git a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp index 54f5608..f375b25 100644 --- a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp +++ b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp @@ -34,7 +34,6 @@ #include "llvm/Support/raw_ostream.h" #include <cassert> #include <limits> -#include <utility> using namespace llvm; using namespace rdf; diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td index 3a77fcd..1f1aebd 100644 --- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -15,141 +15,126 @@ let Namespace = "Hexagon" in { class HexagonReg<bits<5> num, string n, list<string> alt = [], list<Register> alias = []> : Register<n, alt> { let Aliases = alias; - let HWEncoding{4-0} = num; + let HWEncoding{4 -0} = num; } // These registers are used to preserve a distinction between // vector register pairs of differing order. - class HexagonFakeReg<string n> : Register<n> { - let isArtificial = 1; - } + class HexagonFakeReg<string n> : Register<n> { let isArtificial = 1; } class HexagonDoubleReg<bits<5> num, string n, list<Register> subregs, - list<string> alt = []> : - RegisterWithSubRegs<n, subregs> { + list<string> alt = []> + : RegisterWithSubRegs<n, subregs> { let AltNames = alt; - let HWEncoding{4-0} = num; + let HWEncoding{4 -0} = num; } class HexagonSys<bits<7> num, string n, list<string> alt = [], list<Register> alias = []> : Register<n, alt> { let Aliases = alias; - let HWEncoding{6-0} = num; + let HWEncoding{6 -0} = num; } class HexagonDoubleSys<bits<7> num, string n, list<Register> subregs, - list<string> alt = []> : - RegisterWithSubRegs<n, subregs> { + list<string> alt = []> + : RegisterWithSubRegs<n, subregs> { let AltNames = alt; - let HWEncoding{6-0} = num; + let HWEncoding{6 -0} = num; } // Registers are identified with 5-bit ID numbers. // Ri - 32-bit integer registers. - class Ri<bits<5> num, string n, list<string> alt = []> : - HexagonReg<num, n, alt>; + class Ri<bits<5> num, string n, list<string> alt = []> + : HexagonReg<num, n, alt>; // Rp - false/pseudo registers. These registers are used // to provide a distinct set of aliases for both styles of vector // register pairs without encountering subregister indexing constraints. - class R_fake<string n> : - HexagonFakeReg<n>; - + class R_fake<string n> : HexagonFakeReg<n>; // Rf - 32-bit floating-point registers. class Rf<bits<5> num, string n> : HexagonReg<num, n>; // Rd - 64-bit registers. - class Rd<bits<5> num, string n, list<Register> subregs, - list<string> alt = []> : - HexagonDoubleReg<num, n, subregs, alt> { + class Rd<bits<5> num, string n, list<Register> subregs, list<string> alt = []> + : HexagonDoubleReg<num, n, subregs, alt> { let SubRegs = subregs; } // Rp - predicate registers class Rp<bits<5> num, string n> : HexagonReg<num, n>; - // Rq - vector predicate registers class Rq<bits<3> num, string n> : Register<n, []> { - let HWEncoding{2-0} = num; + let HWEncoding{2 -0} = num; } // Rc - control registers - class Rc<bits<5> num, string n, - list<string> alt = [], list<Register> alias = []> : - HexagonReg<num, n, alt, alias>; + class Rc<bits<5> num, string n, list<string> alt = [], + list<Register> alias = []> : HexagonReg<num, n, alt, alias>; // Rcc - 64-bit control registers. class Rcc<bits<5> num, string n, list<Register> subregs, - list<string> alt = []> : - HexagonDoubleReg<num, n, subregs, alt> { + list<string> alt = []> : HexagonDoubleReg<num, n, subregs, alt> { let SubRegs = subregs; } // Rs - system registers - class Rs<bits<7> num, string n, - list<string> alt = [], list<Register> alias = []> : - HexagonSys<num, n, alt, alias>; + class Rs<bits<7> num, string n, list<string> alt = [], + list<Register> alias = []> : HexagonSys<num, n, alt, alias>; // Rss - 64-bit system registers. class Rss<bits<7> num, string n, list<Register> subregs, - list<string> alt = []> : - HexagonDoubleSys<num, n, subregs, alt> { + list<string> alt = []> : HexagonDoubleSys<num, n, subregs, alt> { let SubRegs = subregs; } // Mx - address modifier registers - class Mx<bits<1> num, string n> : Register<n, []> { - let HWEncoding{0} = num; - } + class Mx<bits<1> num, string n> : Register<n, []> { let HWEncoding{0} = num; } // Rg - Guest/Hypervisor registers - class Rg<bits<5> num, string n, - list<string> alt = [], list<Register> alias = []> : - HexagonReg<num, n, alt, alias>; + class Rg<bits<5> num, string n, list<string> alt = [], + list<Register> alias = []> : HexagonReg<num, n, alt, alias>; // Rgg - 64-bit Guest/Hypervisor registers - class Rgg<bits<5> num, string n, list<Register> subregs> : - HexagonDoubleReg<num, n, subregs> { + class Rgg<bits<5> num, string n, list<Register> subregs> + : HexagonDoubleReg<num, n, subregs> { let SubRegs = subregs; } - def isub_lo : SubRegIndex<32>; - def isub_hi : SubRegIndex<32, 32>; - def vsub_lo : SubRegIndex<-1, -1>; - def vsub_hi : SubRegIndex<-1, -1>; - def vsub_fake: SubRegIndex<-1, -1>; - def wsub_lo : SubRegIndex<-1, -1>; - def wsub_hi : SubRegIndex<-1, -1>; + def isub_lo : SubRegIndex<32>; + def isub_hi : SubRegIndex<32, 32>; + def vsub_lo : SubRegIndex<-1, -1>; + def vsub_hi : SubRegIndex<-1, -1>; + def vsub_fake : SubRegIndex<-1, -1>; + def wsub_lo : SubRegIndex<-1, -1>; + def wsub_hi : SubRegIndex<-1, -1>; def subreg_overflow : SubRegIndex<1, 0>; // Integer registers. - foreach i = 0-28 in { - def R#i : Ri<i, "r"#i>, DwarfRegNum<[i]>; - } + foreach i = 0 -28 in { def R#i : Ri<i, "r"#i>, DwarfRegNum<[i]>; } def R29 : Ri<29, "r29", ["sp"]>, DwarfRegNum<[29]>; def R30 : Ri<30, "r30", ["fp"]>, DwarfRegNum<[30]>; def R31 : Ri<31, "r31", ["lr"]>, DwarfRegNum<[31]>; // Aliases of the R* registers used to hold 64-bit int values (doubles). let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in { - def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>; - def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>; - def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>; - def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>; - def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>; - def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>; - def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>; - def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>; - def D8 : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>; - def D9 : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>; - def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>; - def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>; - def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>; - def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>; - def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>; - def D15 : Rd<30, "r31:30", [R30, R31], ["lr:fp"]>, DwarfRegNum<[62]>; + def D0 : Rd<0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>; + def D1 : Rd<2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>; + def D2 : Rd<4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>; + def D3 : Rd<6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>; + def D4 : Rd<8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>; + def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>; + def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>; + def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>; + def D8 : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>; + def D9 : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>; + def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>; + def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>; + def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>; + def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>; + def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>; + def D15 : Rd<30, "r31:30", [R30, R31], ["lr:fp"]>, DwarfRegNum<[62]>; } // Predicate registers. @@ -164,119 +149,118 @@ let Namespace = "Hexagon" in { // on the entire USR. def USR_OVF : Rc<?, "usr.ovf">; - def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[75]> { + def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[75]> { let SubRegIndices = [subreg_overflow]; let SubRegs = [USR_OVF]; } // Control registers. - def SA0: Rc<0, "sa0", ["c0"]>, DwarfRegNum<[67]>; - def LC0: Rc<1, "lc0", ["c1"]>, DwarfRegNum<[68]>; - def SA1: Rc<2, "sa1", ["c2"]>, DwarfRegNum<[69]>; - def LC1: Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>; - def P3_0: Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>, - DwarfRegNum<[71]>; + def SA0 : Rc<0, "sa0", ["c0"]>, DwarfRegNum<[67]>; + def LC0 : Rc<1, "lc0", ["c1"]>, DwarfRegNum<[68]>; + def SA1 : Rc<2, "sa1", ["c2"]>, DwarfRegNum<[69]>; + def LC1 : Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>; + def P3_0 : Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>, DwarfRegNum<[71]>; // When defining more Cn registers, make sure to explicitly mark them // as reserved in HexagonRegisterInfo.cpp. - def C5: Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>; - def M0: Rc<6, "m0", ["c6"]>, DwarfRegNum<[73]>; - def M1: Rc<7, "m1", ["c7"]>, DwarfRegNum<[74]>; + def C5 : Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>; + def M0 : Rc<6, "m0", ["c6"]>, DwarfRegNum<[73]>; + def M1 : Rc<7, "m1", ["c7"]>, DwarfRegNum<[74]>; // Define C8 separately and make it aliased with USR. // The problem is that USR has subregisters (e.g. overflow). If USR was // specified as a subregister of C9_8, it would imply that subreg_overflow // and isub_lo can be composed, which leads to all kinds of issues // with lane masks. - def C8: Rc<8, "c8", [], [USR]>, DwarfRegNum<[75]>; - def PC: Rc<9, "pc", ["c9"]>, DwarfRegNum<[76]>; - def UGP: Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>; - def GP: Rc<11, "gp", ["c11"]>, DwarfRegNum<[78]>; - def CS0: Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>; - def CS1: Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>; - def UPCYCLELO: Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>; - def UPCYCLEHI: Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>; - def FRAMELIMIT: Rc<16, "framelimit", ["c16"]>, DwarfRegNum<[83]>; - def FRAMEKEY: Rc<17, "framekey", ["c17"]>, DwarfRegNum<[84]>; - def PKTCOUNTLO: Rc<18, "pktcountlo", ["c18"]>, DwarfRegNum<[85]>; - def PKTCOUNTHI: Rc<19, "pktcounthi", ["c19"]>, DwarfRegNum<[86]>; - def UTIMERLO: Rc<30, "utimerlo", ["c30"]>, DwarfRegNum<[97]>; - def UTIMERHI: Rc<31, "utimerhi", ["c31"]>, DwarfRegNum<[98]>; + def C8 : Rc<8, "c8", [], [USR]>, DwarfRegNum<[75]>; + def PC : Rc<9, "pc", ["c9"]>, DwarfRegNum<[76]>; + def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>; + def GP : Rc<11, "gp", ["c11"]>, DwarfRegNum<[78]>; + def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>; + def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>; + def UPCYCLELO : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>; + def UPCYCLEHI : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>; + def FRAMELIMIT : Rc<16, "framelimit", ["c16"]>, DwarfRegNum<[83]>; + def FRAMEKEY : Rc<17, "framekey", ["c17"]>, DwarfRegNum<[84]>; + def PKTCOUNTLO : Rc<18, "pktcountlo", ["c18"]>, DwarfRegNum<[85]>; + def PKTCOUNTHI : Rc<19, "pktcounthi", ["c19"]>, DwarfRegNum<[86]>; + def UTIMERLO : Rc<30, "utimerlo", ["c30"]>, DwarfRegNum<[97]>; + def UTIMERHI : Rc<31, "utimerhi", ["c31"]>, DwarfRegNum<[98]>; // Control registers pairs. let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in { - def C1_0 : Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>; - def C3_2 : Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>; - def C5_4 : Rcc<4, "c5:4", [P3_0, C5]>, DwarfRegNum<[71]>; - def C7_6 : Rcc<6, "c7:6", [M0, M1], ["m1:0"]>, DwarfRegNum<[72]>; + def C1_0 : Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>; + def C3_2 : Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>; + def C5_4 : Rcc<4, "c5:4", [P3_0, C5]>, DwarfRegNum<[71]>; + def C7_6 : Rcc<6, "c7:6", [M0, M1], ["m1:0"]>, DwarfRegNum<[72]>; // Use C8 instead of USR as a subregister of C9_8. - def C9_8 : Rcc<8, "c9:8", [C8, PC]>, DwarfRegNum<[74]>; - def C11_10 : Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>; - def CS : Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>; - def UPCYCLE: Rcc<14, "c15:14", [UPCYCLELO, UPCYCLEHI], ["upcycle"]>, - DwarfRegNum<[80]>; - def C17_16 : Rcc<16, "c17:16", [FRAMELIMIT, FRAMEKEY]>, DwarfRegNum<[83]>; + def C9_8 : Rcc<8, "c9:8", [C8, PC]>, DwarfRegNum<[74]>; + def C11_10 : Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>; + def CS : Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>; + def UPCYCLE : Rcc<14, "c15:14", [UPCYCLELO, UPCYCLEHI], ["upcycle"]>, + DwarfRegNum<[80]>; + def C17_16 : Rcc<16, "c17:16", [FRAMELIMIT, FRAMEKEY]>, DwarfRegNum<[83]>; def PKTCOUNT : Rcc<18, "c19:18", [PKTCOUNTLO, PKTCOUNTHI], ["pktcount"]>, - DwarfRegNum<[85]>; - def UTIMER : Rcc<30, "c31:30", [UTIMERLO, UTIMERHI], ["utimer"]>, - DwarfRegNum<[97]>; + DwarfRegNum<[85]>; + def UTIMER : Rcc<30, "c31:30", [UTIMERLO, UTIMERHI], ["utimer"]>, + DwarfRegNum<[97]>; } - foreach i = 0-31 in { - def V#i : Ri<i, "v"#i>, DwarfRegNum<[!add(i, 99)]>; - def VF#i : R_fake<"__"#!add(i,999999)>, DwarfRegNum<[!add(i, 999999)]>; - def VFR#i : R_fake<"__"#!add(i,9999999)>, DwarfRegNum<[!add(i, 9999999)]>; + foreach i = 0 -31 in { + def V#i : Ri<i, "v"#i>, DwarfRegNum<[!add(i, 99)]>; + def VF#i : R_fake<"__"#!add(i, 999999)>, DwarfRegNum<[!add(i, 999999)]>; + def VFR#i : R_fake<"__"#!add(i, 9999999)>, DwarfRegNum<[!add(i, 9999999)]>; } def VTMP : Ri<0, "vtmp">, DwarfRegNum<[131]>; // Aliases of the V* registers used to hold double vec values. let SubRegIndices = [vsub_lo, vsub_hi, vsub_fake], CoveredBySubRegs = 1 in { - def W0 : Rd< 0, "v1:0", [V0, V1, VF0]>, DwarfRegNum<[99]>; - def W1 : Rd< 2, "v3:2", [V2, V3, VF1]>, DwarfRegNum<[101]>; - def W2 : Rd< 4, "v5:4", [V4, V5, VF2]>, DwarfRegNum<[103]>; - def W3 : Rd< 6, "v7:6", [V6, V7, VF3]>, DwarfRegNum<[105]>; - def W4 : Rd< 8, "v9:8", [V8, V9, VF4]>, DwarfRegNum<[107]>; - def W5 : Rd<10, "v11:10", [V10, V11, VF5]>, DwarfRegNum<[109]>; - def W6 : Rd<12, "v13:12", [V12, V13, VF6]>, DwarfRegNum<[111]>; - def W7 : Rd<14, "v15:14", [V14, V15, VF7]>, DwarfRegNum<[113]>; - def W8 : Rd<16, "v17:16", [V16, V17, VF8]>, DwarfRegNum<[115]>; - def W9 : Rd<18, "v19:18", [V18, V19, VF9]>, DwarfRegNum<[117]>; - def W10 : Rd<20, "v21:20", [V20, V21, VF10]>, DwarfRegNum<[119]>; - def W11 : Rd<22, "v23:22", [V22, V23, VF11]>, DwarfRegNum<[121]>; - def W12 : Rd<24, "v25:24", [V24, V25, VF12]>, DwarfRegNum<[123]>; - def W13 : Rd<26, "v27:26", [V26, V27, VF13]>, DwarfRegNum<[125]>; - def W14 : Rd<28, "v29:28", [V28, V29, VF14]>, DwarfRegNum<[127]>; - def W15 : Rd<30, "v31:30", [V30, V31, VF15]>, DwarfRegNum<[129]>; + def W0 : Rd<0, "v1:0", [V0, V1, VF0]>, DwarfRegNum<[99]>; + def W1 : Rd<2, "v3:2", [V2, V3, VF1]>, DwarfRegNum<[101]>; + def W2 : Rd<4, "v5:4", [V4, V5, VF2]>, DwarfRegNum<[103]>; + def W3 : Rd<6, "v7:6", [V6, V7, VF3]>, DwarfRegNum<[105]>; + def W4 : Rd<8, "v9:8", [V8, V9, VF4]>, DwarfRegNum<[107]>; + def W5 : Rd<10, "v11:10", [V10, V11, VF5]>, DwarfRegNum<[109]>; + def W6 : Rd<12, "v13:12", [V12, V13, VF6]>, DwarfRegNum<[111]>; + def W7 : Rd<14, "v15:14", [V14, V15, VF7]>, DwarfRegNum<[113]>; + def W8 : Rd<16, "v17:16", [V16, V17, VF8]>, DwarfRegNum<[115]>; + def W9 : Rd<18, "v19:18", [V18, V19, VF9]>, DwarfRegNum<[117]>; + def W10 : Rd<20, "v21:20", [V20, V21, VF10]>, DwarfRegNum<[119]>; + def W11 : Rd<22, "v23:22", [V22, V23, VF11]>, DwarfRegNum<[121]>; + def W12 : Rd<24, "v25:24", [V24, V25, VF12]>, DwarfRegNum<[123]>; + def W13 : Rd<26, "v27:26", [V26, V27, VF13]>, DwarfRegNum<[125]>; + def W14 : Rd<28, "v29:28", [V28, V29, VF14]>, DwarfRegNum<[127]>; + def W15 : Rd<30, "v31:30", [V30, V31, VF15]>, DwarfRegNum<[129]>; } // Reverse Aliases of the V* registers used to hold double vec values. let SubRegIndices = [vsub_lo, vsub_hi, vsub_fake], CoveredBySubRegs = 1 in { - def WR0 : Rd< 1, "v0:1", [V0, V1, VFR0]>, DwarfRegNum<[161]>; - def WR1 : Rd< 3, "v2:3", [V2, V3, VFR1]>, DwarfRegNum<[162]>; - def WR2 : Rd< 5, "v4:5", [V4, V5, VFR2]>, DwarfRegNum<[163]>; - def WR3 : Rd< 7, "v6:7", [V6, V7, VFR3]>, DwarfRegNum<[164]>; - def WR4 : Rd< 9, "v8:9", [V8, V9, VFR4]>, DwarfRegNum<[165]>; - def WR5 : Rd<11, "v10:11", [V10, V11, VFR5]>, DwarfRegNum<[166]>; - def WR6 : Rd<13, "v12:13", [V12, V13, VFR6]>, DwarfRegNum<[167]>; - def WR7 : Rd<15, "v14:15", [V14, V15, VFR7]>, DwarfRegNum<[168]>; - def WR8 : Rd<17, "v16:17", [V16, V17, VFR8]>, DwarfRegNum<[169]>; - def WR9 : Rd<19, "v18:19", [V18, V19, VFR9]>, DwarfRegNum<[170]>; - def WR10: Rd<21, "v20:21", [V20, V21, VFR10]>, DwarfRegNum<[171]>; - def WR11: Rd<23, "v22:23", [V22, V23, VFR11]>, DwarfRegNum<[172]>; - def WR12: Rd<25, "v24:25", [V24, V25, VFR12]>, DwarfRegNum<[173]>; - def WR13: Rd<27, "v26:27", [V26, V27, VFR13]>, DwarfRegNum<[174]>; - def WR14: Rd<29, "v28:29", [V28, V29, VFR14]>, DwarfRegNum<[175]>; - def WR15: Rd<31, "v30:31", [V30, V31, VFR15]>, DwarfRegNum<[176]>; + def WR0 : Rd<1, "v0:1", [V0, V1, VFR0]>, DwarfRegNum<[161]>; + def WR1 : Rd<3, "v2:3", [V2, V3, VFR1]>, DwarfRegNum<[162]>; + def WR2 : Rd<5, "v4:5", [V4, V5, VFR2]>, DwarfRegNum<[163]>; + def WR3 : Rd<7, "v6:7", [V6, V7, VFR3]>, DwarfRegNum<[164]>; + def WR4 : Rd<9, "v8:9", [V8, V9, VFR4]>, DwarfRegNum<[165]>; + def WR5 : Rd<11, "v10:11", [V10, V11, VFR5]>, DwarfRegNum<[166]>; + def WR6 : Rd<13, "v12:13", [V12, V13, VFR6]>, DwarfRegNum<[167]>; + def WR7 : Rd<15, "v14:15", [V14, V15, VFR7]>, DwarfRegNum<[168]>; + def WR8 : Rd<17, "v16:17", [V16, V17, VFR8]>, DwarfRegNum<[169]>; + def WR9 : Rd<19, "v18:19", [V18, V19, VFR9]>, DwarfRegNum<[170]>; + def WR10 : Rd<21, "v20:21", [V20, V21, VFR10]>, DwarfRegNum<[171]>; + def WR11 : Rd<23, "v22:23", [V22, V23, VFR11]>, DwarfRegNum<[172]>; + def WR12 : Rd<25, "v24:25", [V24, V25, VFR12]>, DwarfRegNum<[173]>; + def WR13 : Rd<27, "v26:27", [V26, V27, VFR13]>, DwarfRegNum<[174]>; + def WR14 : Rd<29, "v28:29", [V28, V29, VFR14]>, DwarfRegNum<[175]>; + def WR15 : Rd<31, "v30:31", [V30, V31, VFR15]>, DwarfRegNum<[176]>; } // Aliases of the V* registers used to hold quad vec values. let SubRegIndices = [wsub_lo, wsub_hi], CoveredBySubRegs = 1 in { - def VQ0 : Rd< 0, "v3:0", [W0, W1]>, DwarfRegNum<[252]>; - def VQ1 : Rd< 4, "v7:4", [W2, W3]>, DwarfRegNum<[253]>; - def VQ2 : Rd< 8, "v11:8", [W4, W5]>, DwarfRegNum<[254]>; - def VQ3 : Rd<12, "v15:12", [W6, W7]>, DwarfRegNum<[255]>; - def VQ4 : Rd<16, "v19:16", [W8, W9]>, DwarfRegNum<[256]>; - def VQ5 : Rd<20, "v23:20", [W10, W11]>, DwarfRegNum<[257]>; - def VQ6 : Rd<24, "v27:24", [W12, W13]>, DwarfRegNum<[258]>; - def VQ7 : Rd<28, "v31:28", [W14, W15]>, DwarfRegNum<[259]>; + def VQ0 : Rd<0, "v3:0", [W0, W1]>, DwarfRegNum<[252]>; + def VQ1 : Rd<4, "v7:4", [W2, W3]>, DwarfRegNum<[253]>; + def VQ2 : Rd<8, "v11:8", [W4, W5]>, DwarfRegNum<[254]>; + def VQ3 : Rd<12, "v15:12", [W6, W7]>, DwarfRegNum<[255]>; + def VQ4 : Rd<16, "v19:16", [W8, W9]>, DwarfRegNum<[256]>; + def VQ5 : Rd<20, "v23:20", [W10, W11]>, DwarfRegNum<[257]>; + def VQ6 : Rd<24, "v27:24", [W12, W13]>, DwarfRegNum<[258]>; + def VQ7 : Rd<28, "v31:28", [W14, W15]>, DwarfRegNum<[259]>; } // Vector Predicate registers. @@ -286,359 +270,357 @@ let Namespace = "Hexagon" in { def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>; // System registers. - def SGP0 : Rs<0, "sgp0", ["s0"]>, DwarfRegNum<[144]>; - def SGP1 : Rs<1, "sgp1", ["s1"]>, DwarfRegNum<[145]>; - def STID : Rs<2, "stid", ["s2"]>, DwarfRegNum<[146]>; - def ELR : Rs<3, "elr", ["s3"]>, DwarfRegNum<[147]>; - def BADVA0 : Rs<4, "badva0", ["s4"]>, DwarfRegNum<[148]>; - def BADVA1 : Rs<5, "badva1", ["s5"]>, DwarfRegNum<[149]>; - def SSR : Rs<6, "ssr", ["s6"]>, DwarfRegNum<[150]>; - def CCR : Rs<7, "ccr", ["s7"]>, DwarfRegNum<[151]>; - def HTID : Rs<8, "htid", ["s8"]>, DwarfRegNum<[152]>; - def BADVA : Rs<9, "badva", ["s9"]>, DwarfRegNum<[153]>; - def IMASK : Rs<10, "imask", ["s10"]>, DwarfRegNum<[154]>; - def S11 : Rs<11, "s11">, DwarfRegNum<[155]>; - def S12 : Rs<12, "s12">, DwarfRegNum<[156]>; - def S13 : Rs<13, "s13">, DwarfRegNum<[157]>; - def S14 : Rs<14, "s14">, DwarfRegNum<[158]>; - def S15 : Rs<15, "s15">, DwarfRegNum<[159]>; - def EVB : Rs<16, "evb", ["s16"]>, DwarfRegNum<[160]>; - def MODECTL : Rs<17, "modectl", ["s17"]>, DwarfRegNum<[161]>; - def SYSCFG : Rs<18, "syscfg", ["s18"]>, DwarfRegNum<[162]>; - def S19 : Rs<19, "s19", ["s19"]>, DwarfRegNum<[163]>; - def S20 : Rs<20, "s20", ["s20"]>, DwarfRegNum<[164]>; - def VID : Rs<21, "vid", ["s21"]>, DwarfRegNum<[165]>; - def S22 : Rs<22, "s22", ["s22"]>, DwarfRegNum<[166]>; - def S23 : Rs<23, "s23">, DwarfRegNum<[167]>; - def S24 : Rs<24, "s24">, DwarfRegNum<[168]>; - def S25 : Rs<25, "s25">, DwarfRegNum<[169]>; - def S26 : Rs<26, "s26">, DwarfRegNum<[170]>; - def CFGBASE : Rs<27, "cfgbase", ["s27"]>, DwarfRegNum<[171]>; - def DIAG : Rs<28, "diag", ["s28"]>, DwarfRegNum<[172]>; - def REV : Rs<29, "rev", ["s29"]>, DwarfRegNum<[173]>; - def PCYCLELO : Rs<30, "pcyclelo", ["s30"]>, DwarfRegNum<[174]>; - def PCYCLEHI : Rs<31, "pcyclehi", ["s31"]>, DwarfRegNum<[175]>; - def ISDBST : Rs<32, "isdbst", ["s32"]>, DwarfRegNum<[176]>; - def ISDBCFG0 : Rs<33, "isdbcfg0", ["s33"]>, DwarfRegNum<[177]>; - def ISDBCFG1 : Rs<34, "isdbcfg1", ["s34"]>, DwarfRegNum<[178]>; - def S35 : Rs<35, "s35">, DwarfRegNum<[179]>; - def BRKPTPC0 : Rs<36, "brkptpc0", ["s36"]>, DwarfRegNum<[180]>; - def BRKPTCFG0: Rs<37, "brkptcfg0", ["s37"]>, DwarfRegNum<[181]>; - def BRKPTPC1 : Rs<38, "brkptpc1", ["s38"]>, DwarfRegNum<[182]>; - def BRKPTCFG1: Rs<39, "brkptcfg1", ["s39"]>, DwarfRegNum<[183]>; - def ISDBMBXIN: Rs<40, "isdbmbxin", ["s40"]>, DwarfRegNum<[184]>; - def ISDBMBXOUT: Rs<41, "isdbmbxout", ["s41"]>, DwarfRegNum<[185]>; - def ISDBEN: Rs<42, "isdben", ["s42"]>, DwarfRegNum<[186]>; - def ISDBGPR: Rs<43, "isdbgpr", ["s43"]>, DwarfRegNum<[187]>; - def S44: Rs<44, "s44">, DwarfRegNum<[188]>; - def S45: Rs<45, "s45">, DwarfRegNum<[189]>; - def S46: Rs<46, "s46">, DwarfRegNum<[190]>; - def S47: Rs<47, "s47">, DwarfRegNum<[191]>; - def PMUCNT0: Rs<48, "pmucnt0", ["s48"]>, DwarfRegNum<[192]>; - def PMUCNT1: Rs<49, "pmucnt1", ["s49"]>, DwarfRegNum<[193]>; - def PMUCNT2: Rs<50, "pmucnt2", ["s50"]>, DwarfRegNum<[194]>; - def PMUCNT3: Rs<51, "pmucnt3", ["s51"]>, DwarfRegNum<[195]>; - def PMUEVTCFG: Rs<52, "pmuevtcfg", ["s52"]>, DwarfRegNum<[196]>; - def PMUCFG: Rs<53, "pmucfg", ["s53"]>, DwarfRegNum<[197]>; - def S54: Rs<54, "s54">, DwarfRegNum<[198]>; - def S55: Rs<55, "s55">, DwarfRegNum<[199]>; - def S56: Rs<56, "s56">, DwarfRegNum<[200]>; - def S57: Rs<57, "s57">, DwarfRegNum<[201]>; - def S58: Rs<58, "s58">, DwarfRegNum<[202]>; - def S59: Rs<59, "s59">, DwarfRegNum<[203]>; - def S60: Rs<60, "s60">, DwarfRegNum<[204]>; - def S61: Rs<61, "s61">, DwarfRegNum<[205]>; - def S62: Rs<62, "s62">, DwarfRegNum<[206]>; - def S63: Rs<63, "s63">, DwarfRegNum<[207]>; - def S64: Rs<64, "s64">, DwarfRegNum<[208]>; - def S65: Rs<65, "s65">, DwarfRegNum<[209]>; - def S66: Rs<66, "s66">, DwarfRegNum<[210]>; - def S67: Rs<67, "s67">, DwarfRegNum<[211]>; - def S68: Rs<68, "s68">, DwarfRegNum<[212]>; - def S69: Rs<69, "s69">, DwarfRegNum<[213]>; - def S70: Rs<70, "s70">, DwarfRegNum<[214]>; - def S71: Rs<71, "s71">, DwarfRegNum<[215]>; - def S72: Rs<72, "s72">, DwarfRegNum<[216]>; - def S73: Rs<73, "s73">, DwarfRegNum<[217]>; - def S74: Rs<74, "s74">, DwarfRegNum<[218]>; - def S75: Rs<75, "s75">, DwarfRegNum<[219]>; - def S76: Rs<76, "s76">, DwarfRegNum<[220]>; - def S77: Rs<77, "s77">, DwarfRegNum<[221]>; - def S78: Rs<78, "s78">, DwarfRegNum<[222]>; - def S79: Rs<79, "s79">, DwarfRegNum<[223]>; - def S80: Rs<80, "s80">, DwarfRegNum<[224]>; + def SGP0 : Rs<0, "sgp0", ["s0"]>, DwarfRegNum<[144]>; + def SGP1 : Rs<1, "sgp1", ["s1"]>, DwarfRegNum<[145]>; + def STID : Rs<2, "stid", ["s2"]>, DwarfRegNum<[146]>; + def ELR : Rs<3, "elr", ["s3"]>, DwarfRegNum<[147]>; + def BADVA0 : Rs<4, "badva0", ["s4"]>, DwarfRegNum<[148]>; + def BADVA1 : Rs<5, "badva1", ["s5"]>, DwarfRegNum<[149]>; + def SSR : Rs<6, "ssr", ["s6"]>, DwarfRegNum<[150]>; + def CCR : Rs<7, "ccr", ["s7"]>, DwarfRegNum<[151]>; + def HTID : Rs<8, "htid", ["s8"]>, DwarfRegNum<[152]>; + def BADVA : Rs<9, "badva", ["s9"]>, DwarfRegNum<[153]>; + def IMASK : Rs<10, "imask", ["s10"]>, DwarfRegNum<[154]>; + def S11 : Rs<11, "s11">, DwarfRegNum<[155]>; + def S12 : Rs<12, "s12">, DwarfRegNum<[156]>; + def S13 : Rs<13, "s13">, DwarfRegNum<[157]>; + def S14 : Rs<14, "s14">, DwarfRegNum<[158]>; + def S15 : Rs<15, "s15">, DwarfRegNum<[159]>; + def EVB : Rs<16, "evb", ["s16"]>, DwarfRegNum<[160]>; + def MODECTL : Rs<17, "modectl", ["s17"]>, DwarfRegNum<[161]>; + def SYSCFG : Rs<18, "syscfg", ["s18"]>, DwarfRegNum<[162]>; + def S19 : Rs<19, "s19", ["s19"]>, DwarfRegNum<[163]>; + def S20 : Rs<20, "s20", ["s20"]>, DwarfRegNum<[164]>; + def VID : Rs<21, "vid", ["s21"]>, DwarfRegNum<[165]>; + def S22 : Rs<22, "s22", ["s22"]>, DwarfRegNum<[166]>; + def S23 : Rs<23, "s23">, DwarfRegNum<[167]>; + def S24 : Rs<24, "s24">, DwarfRegNum<[168]>; + def S25 : Rs<25, "s25">, DwarfRegNum<[169]>; + def S26 : Rs<26, "s26">, DwarfRegNum<[170]>; + def CFGBASE : Rs<27, "cfgbase", ["s27"]>, DwarfRegNum<[171]>; + def DIAG : Rs<28, "diag", ["s28"]>, DwarfRegNum<[172]>; + def REV : Rs<29, "rev", ["s29"]>, DwarfRegNum<[173]>; + def PCYCLELO : Rs<30, "pcyclelo", ["s30"]>, DwarfRegNum<[174]>; + def PCYCLEHI : Rs<31, "pcyclehi", ["s31"]>, DwarfRegNum<[175]>; + def ISDBST : Rs<32, "isdbst", ["s32"]>, DwarfRegNum<[176]>; + def ISDBCFG0 : Rs<33, "isdbcfg0", ["s33"]>, DwarfRegNum<[177]>; + def ISDBCFG1 : Rs<34, "isdbcfg1", ["s34"]>, DwarfRegNum<[178]>; + def S35 : Rs<35, "s35">, DwarfRegNum<[179]>; + def BRKPTPC0 : Rs<36, "brkptpc0", ["s36"]>, DwarfRegNum<[180]>; + def BRKPTCFG0 : Rs<37, "brkptcfg0", ["s37"]>, DwarfRegNum<[181]>; + def BRKPTPC1 : Rs<38, "brkptpc1", ["s38"]>, DwarfRegNum<[182]>; + def BRKPTCFG1 : Rs<39, "brkptcfg1", ["s39"]>, DwarfRegNum<[183]>; + def ISDBMBXIN : Rs<40, "isdbmbxin", ["s40"]>, DwarfRegNum<[184]>; + def ISDBMBXOUT : Rs<41, "isdbmbxout", ["s41"]>, DwarfRegNum<[185]>; + def ISDBEN : Rs<42, "isdben", ["s42"]>, DwarfRegNum<[186]>; + def ISDBGPR : Rs<43, "isdbgpr", ["s43"]>, DwarfRegNum<[187]>; + def S44 : Rs<44, "s44">, DwarfRegNum<[188]>; + def S45 : Rs<45, "s45">, DwarfRegNum<[189]>; + def S46 : Rs<46, "s46">, DwarfRegNum<[190]>; + def S47 : Rs<47, "s47">, DwarfRegNum<[191]>; + def PMUCNT0 : Rs<48, "pmucnt0", ["s48"]>, DwarfRegNum<[192]>; + def PMUCNT1 : Rs<49, "pmucnt1", ["s49"]>, DwarfRegNum<[193]>; + def PMUCNT2 : Rs<50, "pmucnt2", ["s50"]>, DwarfRegNum<[194]>; + def PMUCNT3 : Rs<51, "pmucnt3", ["s51"]>, DwarfRegNum<[195]>; + def PMUEVTCFG : Rs<52, "pmuevtcfg", ["s52"]>, DwarfRegNum<[196]>; + def PMUCFG : Rs<53, "pmucfg", ["s53"]>, DwarfRegNum<[197]>; + def S54 : Rs<54, "s54">, DwarfRegNum<[198]>; + def S55 : Rs<55, "s55">, DwarfRegNum<[199]>; + def S56 : Rs<56, "s56">, DwarfRegNum<[200]>; + def S57 : Rs<57, "s57">, DwarfRegNum<[201]>; + def S58 : Rs<58, "s58">, DwarfRegNum<[202]>; + def S59 : Rs<59, "s59">, DwarfRegNum<[203]>; + def S60 : Rs<60, "s60">, DwarfRegNum<[204]>; + def S61 : Rs<61, "s61">, DwarfRegNum<[205]>; + def S62 : Rs<62, "s62">, DwarfRegNum<[206]>; + def S63 : Rs<63, "s63">, DwarfRegNum<[207]>; + def S64 : Rs<64, "s64">, DwarfRegNum<[208]>; + def S65 : Rs<65, "s65">, DwarfRegNum<[209]>; + def S66 : Rs<66, "s66">, DwarfRegNum<[210]>; + def S67 : Rs<67, "s67">, DwarfRegNum<[211]>; + def S68 : Rs<68, "s68">, DwarfRegNum<[212]>; + def S69 : Rs<69, "s69">, DwarfRegNum<[213]>; + def S70 : Rs<70, "s70">, DwarfRegNum<[214]>; + def S71 : Rs<71, "s71">, DwarfRegNum<[215]>; + def S72 : Rs<72, "s72">, DwarfRegNum<[216]>; + def S73 : Rs<73, "s73">, DwarfRegNum<[217]>; + def S74 : Rs<74, "s74">, DwarfRegNum<[218]>; + def S75 : Rs<75, "s75">, DwarfRegNum<[219]>; + def S76 : Rs<76, "s76">, DwarfRegNum<[220]>; + def S77 : Rs<77, "s77">, DwarfRegNum<[221]>; + def S78 : Rs<78, "s78">, DwarfRegNum<[222]>; + def S79 : Rs<79, "s79">, DwarfRegNum<[223]>; + def S80 : Rs<80, "s80">, DwarfRegNum<[224]>; // System Register Pair let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in { - def SGP1_0 : Rss<0, "s1:0", [SGP0, SGP1], ["sgp1:0"]>, DwarfRegNum<[144]>; - def S3_2 : Rss<2, "s3:2", [STID, ELR]>, DwarfRegNum<[146]>; - def S5_4 : Rss<4, "s5:4", [BADVA0, BADVA1], ["badva1:0"]>, - DwarfRegNum<[148]>; - def S7_6 : Rss<6, "s7:6", [SSR, CCR], ["ccr:ssr"]>, DwarfRegNum<[150]>; - def S9_8 : Rss<8, "s9:8", [HTID, BADVA]>, DwarfRegNum<[152]>; - def S11_10 : Rss<10, "s11:10", [IMASK, S11]>, DwarfRegNum<[154]>; - def S13_12 : Rss<12, "s13:12", [S12, S13]>, DwarfRegNum<[156]>; - def S15_14 : Rss<14, "s15:14", [S14, S15]>, DwarfRegNum<[158]>; - def S17_16 : Rss<16, "s17:16", [EVB, MODECTL]>, DwarfRegNum<[160]>; - def S19_18 : Rss<18, "s19:18", [SYSCFG, S19]>, DwarfRegNum<[162]>; - def S21_20 : Rss<20, "s21:20", [S20, VID]>, DwarfRegNum<[164]>; - def S23_22 : Rss<22, "s23:22", [S22, S23]>, DwarfRegNum<[166]>; - def S25_24 : Rss<24, "s25:24", [S24, S25]>, DwarfRegNum<[168]>; - def S27_26 : Rss<26, "s27:26", [S26, CFGBASE]>, DwarfRegNum<[170]>; - def S29_28 : Rss<28, "s29:28", [DIAG, REV]>, DwarfRegNum<[172]>; - def S31_30 : Rss<30, "s31:30", [PCYCLELO, PCYCLEHI], ["pcycle"]>, DwarfRegNum<[174]>; - def S33_32 : Rss<32, "s33:32", [ISDBST, ISDBCFG0]>, DwarfRegNum<[176]>; - def S35_34 : Rss<34, "s35:34", [ISDBCFG1, S35]>, DwarfRegNum<[178]>; - def S37_36 : Rss<36, "s37:36", [BRKPTPC0, BRKPTCFG0]>, DwarfRegNum<[180]>; - def S39_38 : Rss<38, "s39:38", [BRKPTPC1, BRKPTCFG1]>, DwarfRegNum<[182]>; + def SGP1_0 : Rss<0, "s1:0", [SGP0, SGP1], ["sgp1:0"]>, DwarfRegNum<[144]>; + def S3_2 : Rss<2, "s3:2", [STID, ELR]>, DwarfRegNum<[146]>; + def S5_4 : Rss<4, "s5:4", [BADVA0, BADVA1], ["badva1:0"]>, + DwarfRegNum<[148]>; + def S7_6 : Rss<6, "s7:6", [SSR, CCR], ["ccr:ssr"]>, DwarfRegNum<[150]>; + def S9_8 : Rss<8, "s9:8", [HTID, BADVA]>, DwarfRegNum<[152]>; + def S11_10 : Rss<10, "s11:10", [IMASK, S11]>, DwarfRegNum<[154]>; + def S13_12 : Rss<12, "s13:12", [S12, S13]>, DwarfRegNum<[156]>; + def S15_14 : Rss<14, "s15:14", [S14, S15]>, DwarfRegNum<[158]>; + def S17_16 : Rss<16, "s17:16", [EVB, MODECTL]>, DwarfRegNum<[160]>; + def S19_18 : Rss<18, "s19:18", [SYSCFG, S19]>, DwarfRegNum<[162]>; + def S21_20 : Rss<20, "s21:20", [S20, VID]>, DwarfRegNum<[164]>; + def S23_22 : Rss<22, "s23:22", [S22, S23]>, DwarfRegNum<[166]>; + def S25_24 : Rss<24, "s25:24", [S24, S25]>, DwarfRegNum<[168]>; + def S27_26 : Rss<26, "s27:26", [S26, CFGBASE]>, DwarfRegNum<[170]>; + def S29_28 : Rss<28, "s29:28", [DIAG, REV]>, DwarfRegNum<[172]>; + def S31_30 : Rss<30, "s31:30", [PCYCLELO, PCYCLEHI], ["pcycle"]>, + DwarfRegNum<[174]>; + def S33_32 : Rss<32, "s33:32", [ISDBST, ISDBCFG0]>, DwarfRegNum<[176]>; + def S35_34 : Rss<34, "s35:34", [ISDBCFG1, S35]>, DwarfRegNum<[178]>; + def S37_36 : Rss<36, "s37:36", [BRKPTPC0, BRKPTCFG0]>, DwarfRegNum<[180]>; + def S39_38 : Rss<38, "s39:38", [BRKPTPC1, BRKPTCFG1]>, DwarfRegNum<[182]>; def S41_40 : Rss<40, "s41:40", [ISDBMBXIN, ISDBMBXOUT]>, DwarfRegNum<[184]>; - def S43_42 : Rss<42, "s43:42", [ISDBEN, ISDBGPR]>, DwarfRegNum<[186]>; - def S45_44 : Rss<44, "s45:44", [S44, S45]>, DwarfRegNum<[188]>; - def S47_46 : Rss<46, "s47:46", [S46, S47]>, DwarfRegNum<[190]>; - def S49_48 : Rss<48, "s49:48", [PMUCNT0, PMUCNT1]>, DwarfRegNum<[192]>; - def S51_50 : Rss<50, "s51:50", [PMUCNT2, PMUCNT3]>, DwarfRegNum<[194]>; - def S53_52 : Rss<52, "s53:52", [PMUEVTCFG, PMUCFG]>, DwarfRegNum<[196]>; - def S55_54 : Rss<54, "s55:54", [S54, S55]>, DwarfRegNum<[198]>; - def S57_56 : Rss<56, "s57:56", [S56, S57]>, DwarfRegNum<[200]>; - def S59_58 : Rss<58, "s59:58", [S58, S59]>, DwarfRegNum<[202]>; - def S61_60 : Rss<60, "s61:60", [S60, S61]>, DwarfRegNum<[204]>; - def S63_62 : Rss<62, "s63:62", [S62, S63]>, DwarfRegNum<[206]>; - def S65_64 : Rss<64, "s65:64", [S64, S65]>, DwarfRegNum<[208]>; - def S67_66 : Rss<66, "s67:66", [S66, S67]>, DwarfRegNum<[210]>; - def S69_68 : Rss<68, "s69:68", [S68, S69]>, DwarfRegNum<[212]>; - def S71_70 : Rss<70, "s71:70", [S70, S71]>, DwarfRegNum<[214]>; - def S73_72 : Rss<72, "s73:72", [S72, S73]>, DwarfRegNum<[216]>; - def S75_74 : Rss<74, "s75:74", [S74, S75]>, DwarfRegNum<[218]>; - def S77_76 : Rss<76, "s77:76", [S77, S76]>, DwarfRegNum<[219]>; - def S79_78 : Rss<78, "s79:78", [S79, S78]>, DwarfRegNum<[220]>; + def S43_42 : Rss<42, "s43:42", [ISDBEN, ISDBGPR]>, DwarfRegNum<[186]>; + def S45_44 : Rss<44, "s45:44", [S44, S45]>, DwarfRegNum<[188]>; + def S47_46 : Rss<46, "s47:46", [S46, S47]>, DwarfRegNum<[190]>; + def S49_48 : Rss<48, "s49:48", [PMUCNT0, PMUCNT1]>, DwarfRegNum<[192]>; + def S51_50 : Rss<50, "s51:50", [PMUCNT2, PMUCNT3]>, DwarfRegNum<[194]>; + def S53_52 : Rss<52, "s53:52", [PMUEVTCFG, PMUCFG]>, DwarfRegNum<[196]>; + def S55_54 : Rss<54, "s55:54", [S54, S55]>, DwarfRegNum<[198]>; + def S57_56 : Rss<56, "s57:56", [S56, S57]>, DwarfRegNum<[200]>; + def S59_58 : Rss<58, "s59:58", [S58, S59]>, DwarfRegNum<[202]>; + def S61_60 : Rss<60, "s61:60", [S60, S61]>, DwarfRegNum<[204]>; + def S63_62 : Rss<62, "s63:62", [S62, S63]>, DwarfRegNum<[206]>; + def S65_64 : Rss<64, "s65:64", [S64, S65]>, DwarfRegNum<[208]>; + def S67_66 : Rss<66, "s67:66", [S66, S67]>, DwarfRegNum<[210]>; + def S69_68 : Rss<68, "s69:68", [S68, S69]>, DwarfRegNum<[212]>; + def S71_70 : Rss<70, "s71:70", [S70, S71]>, DwarfRegNum<[214]>; + def S73_72 : Rss<72, "s73:72", [S72, S73]>, DwarfRegNum<[216]>; + def S75_74 : Rss<74, "s75:74", [S74, S75]>, DwarfRegNum<[218]>; + def S77_76 : Rss<76, "s77:76", [S77, S76]>, DwarfRegNum<[219]>; + def S79_78 : Rss<78, "s79:78", [S79, S78]>, DwarfRegNum<[220]>; } // Guest Registers - def GELR: Rg<0, "gelr", ["g0"]>, DwarfRegNum<[220]>; - def GSR: Rg<1, "gsr", ["g1"]>, DwarfRegNum<[221]>; - def GOSP: Rg<2, "gosp", ["g2"]>, DwarfRegNum<[222]>; - def G3: Rg<3, "gbadva", ["g3"]>, DwarfRegNum<[223]>; - def G4: Rg<4, "g4">, DwarfRegNum<[224]>; - def G5: Rg<5, "g5">, DwarfRegNum<[225]>; - def G6: Rg<6, "g6">, DwarfRegNum<[226]>; - def G7: Rg<7, "g7">, DwarfRegNum<[227]>; - def G8: Rg<8, "g8">, DwarfRegNum<[228]>; - def G9: Rg<9, "g9">, DwarfRegNum<[229]>; - def G10: Rg<10, "g10">, DwarfRegNum<[230]>; - def G11: Rg<11, "g11">, DwarfRegNum<[231]>; - def G12: Rg<12, "g12">, DwarfRegNum<[232]>; - def G13: Rg<13, "g13">, DwarfRegNum<[233]>; - def G14: Rg<14, "g14">, DwarfRegNum<[234]>; - def G15: Rg<15, "g15">, DwarfRegNum<[235]>; - def GPMUCNT4: Rg<16, "gpmucnt4", ["g16"]>, DwarfRegNum<[236]>; - def GPMUCNT5: Rg<17, "gpmucnt5", ["g17"]>, DwarfRegNum<[237]>; - def GPMUCNT6: Rg<18, "gpmucnt6", ["g18"]>, DwarfRegNum<[238]>; - def GPMUCNT7: Rg<19, "gpmucnt7", ["g19"]>, DwarfRegNum<[239]>; - def G20: Rg<20, "g20">, DwarfRegNum<[240]>; - def G21: Rg<21, "g21">, DwarfRegNum<[241]>; - def G22: Rg<22, "g22">, DwarfRegNum<[242]>; - def G23: Rg<23, "g23">, DwarfRegNum<[243]>; - def GPCYCLELO: Rg<24, "gpcyclelo", ["g24"]>, DwarfRegNum<[244]>; - def GPCYCLEHI: Rg<25, "gpcyclehi", ["g25"]>, DwarfRegNum<[245]>; - def GPMUCNT0: Rg<26, "gpmucnt0", ["g26"]>, DwarfRegNum<[246]>; - def GPMUCNT1: Rg<27, "gpmucnt1", ["g27"]>, DwarfRegNum<[247]>; - def GPMUCNT2: Rg<28, "gpmucnt2", ["g28"]>, DwarfRegNum<[248]>; - def GPMUCNT3: Rg<29, "gpmucnt3", ["g29"]>, DwarfRegNum<[249]>; - def G30: Rg<30, "g30">, DwarfRegNum<[250]>; - def G31: Rg<31, "g31">, DwarfRegNum<[251]>; + def GELR : Rg<0, "gelr", ["g0"]>, DwarfRegNum<[220]>; + def GSR : Rg<1, "gsr", ["g1"]>, DwarfRegNum<[221]>; + def GOSP : Rg<2, "gosp", ["g2"]>, DwarfRegNum<[222]>; + def G3 : Rg<3, "gbadva", ["g3"]>, DwarfRegNum<[223]>; + def G4 : Rg<4, "g4">, DwarfRegNum<[224]>; + def G5 : Rg<5, "g5">, DwarfRegNum<[225]>; + def G6 : Rg<6, "g6">, DwarfRegNum<[226]>; + def G7 : Rg<7, "g7">, DwarfRegNum<[227]>; + def G8 : Rg<8, "g8">, DwarfRegNum<[228]>; + def G9 : Rg<9, "g9">, DwarfRegNum<[229]>; + def G10 : Rg<10, "g10">, DwarfRegNum<[230]>; + def G11 : Rg<11, "g11">, DwarfRegNum<[231]>; + def G12 : Rg<12, "g12">, DwarfRegNum<[232]>; + def G13 : Rg<13, "g13">, DwarfRegNum<[233]>; + def G14 : Rg<14, "g14">, DwarfRegNum<[234]>; + def G15 : Rg<15, "g15">, DwarfRegNum<[235]>; + def GPMUCNT4 : Rg<16, "gpmucnt4", ["g16"]>, DwarfRegNum<[236]>; + def GPMUCNT5 : Rg<17, "gpmucnt5", ["g17"]>, DwarfRegNum<[237]>; + def GPMUCNT6 : Rg<18, "gpmucnt6", ["g18"]>, DwarfRegNum<[238]>; + def GPMUCNT7 : Rg<19, "gpmucnt7", ["g19"]>, DwarfRegNum<[239]>; + def G20 : Rg<20, "g20">, DwarfRegNum<[240]>; + def G21 : Rg<21, "g21">, DwarfRegNum<[241]>; + def G22 : Rg<22, "g22">, DwarfRegNum<[242]>; + def G23 : Rg<23, "g23">, DwarfRegNum<[243]>; + def GPCYCLELO : Rg<24, "gpcyclelo", ["g24"]>, DwarfRegNum<[244]>; + def GPCYCLEHI : Rg<25, "gpcyclehi", ["g25"]>, DwarfRegNum<[245]>; + def GPMUCNT0 : Rg<26, "gpmucnt0", ["g26"]>, DwarfRegNum<[246]>; + def GPMUCNT1 : Rg<27, "gpmucnt1", ["g27"]>, DwarfRegNum<[247]>; + def GPMUCNT2 : Rg<28, "gpmucnt2", ["g28"]>, DwarfRegNum<[248]>; + def GPMUCNT3 : Rg<29, "gpmucnt3", ["g29"]>, DwarfRegNum<[249]>; + def G30 : Rg<30, "g30">, DwarfRegNum<[250]>; + def G31 : Rg<31, "g31">, DwarfRegNum<[251]>; // Guest Register Pairs let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in { - def G1_0 : Rgg<0, "g1:0", [GELR, GSR]>, DwarfRegNum<[220]>; - def G3_2 : Rgg<2, "g3:2", [GOSP, G3]>, DwarfRegNum<[222]>; - def G5_4 : Rgg<4, "g5:4", [G4, G5]>, DwarfRegNum<[224]>; - def G7_6 : Rgg<6, "g7:6", [G6, G7]>, DwarfRegNum<[226]>; - def G9_8 : Rgg<8, "g9:8", [G8, G9]>, DwarfRegNum<[228]>; - def G11_10 : Rgg<10, "g11:10", [G10, G11]>, DwarfRegNum<[230]>; - def G13_12 : Rgg<12, "g13:12", [G12, G13]>, DwarfRegNum<[232]>; - def G15_14 : Rgg<14, "g15:14", [G14, G15]>, DwarfRegNum<[234]>; - def G17_16 : Rgg<16, "g17:16", [GPMUCNT4, GPMUCNT5]>, DwarfRegNum<[236]>; - def G19_18 : Rgg<18, "g19:18", [GPMUCNT6, GPMUCNT7]>, DwarfRegNum<[238]>; - def G21_20 : Rgg<20, "g21:20", [G20, G21]>, DwarfRegNum<[240]>; - def G23_22 : Rgg<22, "g23:22", [G22, G23]>, DwarfRegNum<[242]>; + def G1_0 : Rgg<0, "g1:0", [GELR, GSR]>, DwarfRegNum<[220]>; + def G3_2 : Rgg<2, "g3:2", [GOSP, G3]>, DwarfRegNum<[222]>; + def G5_4 : Rgg<4, "g5:4", [G4, G5]>, DwarfRegNum<[224]>; + def G7_6 : Rgg<6, "g7:6", [G6, G7]>, DwarfRegNum<[226]>; + def G9_8 : Rgg<8, "g9:8", [G8, G9]>, DwarfRegNum<[228]>; + def G11_10 : Rgg<10, "g11:10", [G10, G11]>, DwarfRegNum<[230]>; + def G13_12 : Rgg<12, "g13:12", [G12, G13]>, DwarfRegNum<[232]>; + def G15_14 : Rgg<14, "g15:14", [G14, G15]>, DwarfRegNum<[234]>; + def G17_16 : Rgg<16, "g17:16", [GPMUCNT4, GPMUCNT5]>, DwarfRegNum<[236]>; + def G19_18 : Rgg<18, "g19:18", [GPMUCNT6, GPMUCNT7]>, DwarfRegNum<[238]>; + def G21_20 : Rgg<20, "g21:20", [G20, G21]>, DwarfRegNum<[240]>; + def G23_22 : Rgg<22, "g23:22", [G22, G23]>, DwarfRegNum<[242]>; def G25_24 : Rgg<24, "g25:24", [GPCYCLELO, GPCYCLEHI]>, DwarfRegNum<[244]>; - def G27_26 : Rgg<26, "g27:26", [GPMUCNT0, GPMUCNT1]>, DwarfRegNum<[246]>; - def G29_28 : Rgg<28, "g29:28", [GPMUCNT2, GPMUCNT3]>, DwarfRegNum<[248]>; - def G31_30 : Rgg<30, "g31:30", [G30, G31]>, DwarfRegNum<[250]>; + def G27_26 : Rgg<26, "g27:26", [GPMUCNT0, GPMUCNT1]>, DwarfRegNum<[246]>; + def G29_28 : Rgg<28, "g29:28", [GPMUCNT2, GPMUCNT3]>, DwarfRegNum<[248]>; + def G31_30 : Rgg<30, "g31:30", [G30, G31]>, DwarfRegNum<[250]>; } - } // HVX types -def VecI1: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v64i1, v128i1, v64i1]>; -def VecI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v64i8, v128i8, v64i8]>; -def VecI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v32i16, v64i16, v32i16]>; -def VecI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v16i32, v32i32, v16i32]>; -def VecF16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v32f16, v64f16, v32f16]>; -def VecF32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v16f32, v32f32, v16f32]>; - -def VecPI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v128i8, v256i8, v128i8]>; -def VecPI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v64i16, v128i16, v64i16]>; -def VecPI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v32i32, v64i32, v32i32]>; -def VecPF16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v64f16, v128f16, v64f16]>; -def VecPF32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v32f32, v64f32, v32f32]>; - -def VecQ8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v64i1, v128i1, v64i1]>; -def VecQ16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v32i1, v64i1, v32i1]>; -def VecQ32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v16i1, v32i1, v16i1]>; +def VecI1 + : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64i1, v128i1, v64i1]>; +def VecI8 + : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64i8, v128i8, v64i8]>; +def VecI16 + : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32i16, v64i16, v32i16]>; +def VecI32 + : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v16i32, v32i32, v16i32]>; +def VecF16 + : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32f16, v64f16, v32f16]>; +def VecF32 + : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v16f32, v32f32, v16f32]>; +def VecBF16 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32bf16, v64bf16, + v32bf16]>; + +def VecPI8 + : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v128i8, v256i8, v128i8]>; +def VecPI16 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64i16, v128i16, + v64i16]>; +def VecPI32 + : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32i32, v64i32, v32i32]>; +def VecPF16 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64f16, v128f16, + v64f16]>; +def VecPF32 + : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32f32, v64f32, v32f32]>; +def VecPBF16 + : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64bf16, v128bf16, + v64bf16]>; + +def VecQ8 + : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64i1, v128i1, v64i1]>; +def VecQ16 + : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32i1, v64i1, v32i1]>; +def VecQ32 + : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v16i1, v32i1, v16i1]>; // HVX register classes -def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32, VecF16, VecF32], 512, - (add (sequence "V%u", 0, 31), VTMP)> { - let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], - [RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>; +def HvxVR + : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32, VecF16, VecBF16, VecF32], + 512, (add (sequence "V%u", 0, 31), VTMP)> { + let RegInfos = + RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<512, 512, 512>, + RegInfo<1024, 1024, 1024>, + RegInfo<512, 512, 512>]>; } -def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32, VecPF16, VecPF32], 1024, - (add (sequence "W%u", 0, 15), (sequence "WR%u", 0, 15))> { - let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], - [RegInfo<1024,1024,512>, RegInfo<2048,2048,1024>, RegInfo<1024,1024,512>]>; +def HvxWR + : RegisterClass< + "Hexagon", [VecPI8, VecPI16, VecPI32, VecPF16, VecPBF16, VecPF32], + 1024, (add (sequence "W%u", 0, 15), (sequence "WR%u", 0, 15))> { + let RegInfos = + RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<1024, 1024, 512>, + RegInfo<2048, 2048, 1024>, + RegInfo<1024, 1024, 512>]>; } def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 128, - (add Q0, Q1, Q2, Q3)> { - let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], - [RegInfo<64,512,512>, RegInfo<128,1024,1024>, RegInfo<64,512,512>]>; + (add Q0, Q1, Q2, Q3)> { + let RegInfos = + RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<64, 512, 512>, + RegInfo<128, 1024, 1024>, + RegInfo<64, 512, 512>]>; } -def HvxVQR : RegisterClass<"Hexagon", [untyped], 2048, - (add (sequence "VQ%u", 0, 7))> { - let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], - [RegInfo<2048,2048,512>, RegInfo<4096,4096,1024>, RegInfo<2048,2048,512>]>; +def HvxVQR + : RegisterClass<"Hexagon", [untyped], 2048, (add (sequence "VQ%u", 0, 7))> { + let RegInfos = + RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<2048, 2048, 512>, + RegInfo<4096, 4096, 1024>, + RegInfo<2048, 2048, 512>]>; } // Core register classes -def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32, - (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28), - R10, R11, R29, R30, R31)>; +def IntRegs + : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32, + (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28), R10, + R11, R29, R30, R31)>; // Registers are listed in reverse order for allocation preference reasons. def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32, - (add R23, R22, R21, R20, R19, R18, R17, R16, - R7, R6, R5, R4, R3, R2, R1, R0)>; + (add R23, R22, R21, R20, R19, R18, R17, R16, + R7, R6, R5, R4, R3, R2, R1, R0)>; -def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32, - (add R7, R6, R5, R4, R3, R2, R1, R0)> ; +def IntRegsLow8 + : RegisterClass<"Hexagon", [i32], 32, (add R7, R6, R5, R4, R3, R2, R1, R0)>; def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64, - (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>; - -def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64, - (add D11, D10, D9, D8, D3, D2, D1, D0)>; - -let Size = 32 in -def PredRegs : RegisterClass<"Hexagon", - [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32, (add P0, P1, P2, P3)>; - -let Size = 32 in -def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>; - -let Size = 32, isAllocatable = 0 in -def CtrRegs : RegisterClass<"Hexagon", [i32], 32, - (add LC0, SA0, LC1, SA1, P3_0, C5, C8, PC, UGP, GP, CS0, CS1, - UPCYCLELO, UPCYCLEHI, - FRAMELIMIT, FRAMEKEY, PKTCOUNTLO, PKTCOUNTHI, UTIMERLO, UTIMERHI, - M0, M1, USR)>; - -let Size = 64 in -def VectRegRev : RegisterClass<"Hexagon", [i64], 64, - (add (sequence "WR%u", 0, 15))>; - -let isAllocatable = 0 in -def UsrBits : RegisterClass<"Hexagon", [i1], 0, (add USR_OVF)>; - -let Size = 64, isAllocatable = 0 in -def CtrRegs64 : RegisterClass<"Hexagon", [i64], 64, - (add C1_0, C3_2, C5_4, C7_6, C9_8, C11_10, CS, UPCYCLE, C17_16, - PKTCOUNT, UTIMER)>; - -let Size = 32, isAllocatable = 0 in -def GuestRegs : RegisterClass<"Hexagon", [i32], 32, - (add GELR, GSR, GOSP, - (sequence "G%u", 3, 15), - GPMUCNT4, GPMUCNT5, GPMUCNT6, GPMUCNT7, - G20, G21, G22, G23, - GPCYCLELO, GPCYCLEHI, GPMUCNT0, GPMUCNT1, - GPMUCNT2, GPMUCNT3, - G30, G31)>; - -let Size = 64, isAllocatable = 0 in -def GuestRegs64 : RegisterClass<"Hexagon", [i64], 64, - (add G1_0, G3_2, - G5_4, G7_6, G9_8, G11_10, G13_12, G15_14, - G17_16, G19_18, - G21_20, G23_22, - G25_24, G27_26, G29_28, - G31_30)>; - -let Size = 32, isAllocatable = 0 in -def SysRegs : RegisterClass<"Hexagon", [i32], 32, - (add SGP0, SGP1, STID, ELR, BADVA0, BADVA1, - SSR, CCR, HTID, BADVA, IMASK, - S11, S12, S13, S14, S15, - S19, S23, S25, - EVB, MODECTL, SYSCFG, S20, VID, S22, S24, - S26, CFGBASE, DIAG, REV, PCYCLEHI, - PCYCLELO, ISDBST, ISDBCFG0, ISDBCFG1, S35, - BRKPTPC0, BRKPTCFG0, BRKPTPC1, BRKPTCFG1, - ISDBMBXIN, ISDBMBXOUT, ISDBEN, ISDBGPR, - S44, S45, S46, S47, - PMUCNT0, PMUCNT1, PMUCNT2, PMUCNT3, - PMUEVTCFG, PMUCFG, S54, S55, S56, S57, - S58, S59, S60, S61, S62, S63, S64, S65, S66, S67, - S68, S69, S70, S71, S72, S73, S74, S75, S76, S77, - S78, S79, S80 - )>; - -let Size = 64, isAllocatable = 0 in -def SysRegs64 : RegisterClass<"Hexagon", [i64], 64, - (add SGP1_0, - S3_2, S5_4, S7_6, S9_8, - S11_10, S13_12, S15_14, - S17_16, S19_18, S21_20, - S23_22, S25_24, - S27_26, S29_28, S31_30, S33_32, S35_34, - S37_36, S39_38, S41_40, S43_42, S45_44, - S47_46, S49_48, S51_50, S53_52, - S55_54, S57_56, S59_58, - S61_60, S63_62, S65_64, S67_66, S69_68, - S71_70, S73_72, S75_74, S77_76, S79_78 - )>; + (add (sequence "D%u", 0, 4), + (sequence "D%u", 6, 13), D5, D14, D15)>; + +def GeneralDoubleLow8Regs + : RegisterClass<"Hexagon", [i64], 64, + (add D11, D10, D9, D8, D3, D2, D1, D0)>; + +let Size = 32 in def PredRegs + : RegisterClass<"Hexagon", [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32, + (add P0, P1, P2, P3)>; + +let Size = + 32 in def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>; + +let Size = 32, isAllocatable = 0 in def CtrRegs + : RegisterClass<"Hexagon", [i32], 32, + (add LC0, SA0, LC1, SA1, P3_0, C5, C8, PC, UGP, GP, CS0, + CS1, UPCYCLELO, UPCYCLEHI, FRAMELIMIT, FRAMEKEY, + PKTCOUNTLO, PKTCOUNTHI, UTIMERLO, UTIMERHI, M0, M1, + USR)>; + +let Size = 64 in def VectRegRev + : RegisterClass<"Hexagon", [i64], 64, (add (sequence "WR%u", 0, 15))>; + +let isAllocatable = + 0 in def UsrBits : RegisterClass<"Hexagon", [i1], 0, (add USR_OVF)>; + +let Size = 64, isAllocatable = 0 in def CtrRegs64 + : RegisterClass<"Hexagon", [i64], 64, + (add C1_0, C3_2, C5_4, C7_6, C9_8, C11_10, CS, UPCYCLE, + C17_16, PKTCOUNT, UTIMER)>; + +let Size = 32, isAllocatable = 0 in def GuestRegs + : RegisterClass<"Hexagon", [i32], 32, + (add GELR, GSR, GOSP, (sequence "G%u", 3, 15), GPMUCNT4, + GPMUCNT5, GPMUCNT6, GPMUCNT7, G20, G21, G22, G23, + GPCYCLELO, GPCYCLEHI, GPMUCNT0, GPMUCNT1, GPMUCNT2, + GPMUCNT3, G30, G31)>; + +let Size = 64, isAllocatable = 0 in def GuestRegs64 + : RegisterClass<"Hexagon", [i64], 64, + (add G1_0, G3_2, G5_4, G7_6, G9_8, G11_10, G13_12, G15_14, + G17_16, G19_18, G21_20, G23_22, G25_24, G27_26, G29_28, + G31_30)>; + +let Size = 32, isAllocatable = 0 in def SysRegs + : RegisterClass<"Hexagon", [i32], 32, + (add SGP0, SGP1, STID, ELR, BADVA0, BADVA1, SSR, CCR, HTID, + BADVA, IMASK, S11, S12, S13, S14, S15, S19, S23, S25, + EVB, MODECTL, SYSCFG, S20, VID, S22, S24, S26, CFGBASE, + DIAG, REV, PCYCLEHI, PCYCLELO, ISDBST, ISDBCFG0, + ISDBCFG1, S35, BRKPTPC0, BRKPTCFG0, BRKPTPC1, BRKPTCFG1, + ISDBMBXIN, ISDBMBXOUT, ISDBEN, ISDBGPR, S44, S45, S46, + S47, PMUCNT0, PMUCNT1, PMUCNT2, PMUCNT3, PMUEVTCFG, + PMUCFG, S54, S55, S56, S57, S58, S59, S60, S61, S62, + S63, S64, S65, S66, S67, S68, S69, S70, S71, S72, S73, + S74, S75, S76, S77, S78, S79, S80)>; + +let Size = 64, isAllocatable = 0 in def SysRegs64 + : RegisterClass<"Hexagon", [i64], 64, + (add SGP1_0, S3_2, S5_4, S7_6, S9_8, S11_10, S13_12, S15_14, + S17_16, S19_18, S21_20, S23_22, S25_24, S27_26, S29_28, + S31_30, S33_32, S35_34, S37_36, S39_38, S41_40, S43_42, + S45_44, S47_46, S49_48, S51_50, S53_52, S55_54, S57_56, + S59_58, S61_60, S63_62, S65_64, S67_66, S69_68, S71_70, + S73_72, S75_74, S77_76, S79_78)>; // These registers are new for v62 and onward. // The function RegisterMatchesArch() uses this list for validation. -let isAllocatable = 0 in -def V62Regs : RegisterClass<"Hexagon", [i32], 32, - (add FRAMELIMIT, FRAMEKEY, C17_16, PKTCOUNTLO, PKTCOUNTHI, PKTCOUNT, - UTIMERLO, UTIMERHI, UTIMER)>; +let isAllocatable = 0 in def V62Regs + : RegisterClass<"Hexagon", [i32], 32, + (add FRAMELIMIT, FRAMEKEY, C17_16, PKTCOUNTLO, PKTCOUNTHI, + PKTCOUNT, UTIMERLO, UTIMERHI, UTIMER)>; // These registers are new for v65 and onward. -let Size = 32, isAllocatable = 0 in -def V65Regs : RegisterClass<"Hexagon", [i32], 32, (add VTMP)>; - +let Size = 32, isAllocatable = 0 in def V65Regs + : RegisterClass<"Hexagon", [i32], 32, (add VTMP)>; -def HexagonCSR - : CalleeSavedRegs<(add R16, R17, R18, R19, R20, R21, R22, R23, - R24, R25, R26, R27)>; +def HexagonCSR : CalleeSavedRegs<(add R16, R17, R18, R19, R20, R21, R22, R23, + R24, R25, R26, R27)>; diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index ce2de75..66c8b0a 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -28,7 +28,6 @@ #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> -#include <map> #include <optional> using namespace llvm; @@ -77,8 +76,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, OptLevel(TM.getOptLevel()), CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), - RegInfo(getHwMode()), TLInfo(TM, *this), - InstrItins(getInstrItineraryForCPU(CPUString)) { + TLInfo(TM, *this), InstrItins(getInstrItineraryForCPU(CPUString)) { Hexagon_MC::addArchSubtarget(this, FS); // Beware of the default constructor of InstrItineraryData: it will // reset all members to 0. diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 995f66d..dde3229 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -100,7 +100,6 @@ private: // The following objects can use the TargetTriple, so they must be // declared after it. HexagonInstrInfo InstrInfo; - HexagonRegisterInfo RegInfo; HexagonTargetLowering TLInfo; HexagonSelectionDAGInfo TSInfo; HexagonFrameLowering FrameLowering; @@ -122,7 +121,7 @@ public: } const HexagonInstrInfo *getInstrInfo() const override { return &InstrInfo; } const HexagonRegisterInfo *getRegisterInfo() const override { - return &RegInfo; + return &InstrInfo.getRegisterInfo(); } const HexagonTargetLowering *getTargetLowering() const override { return &TLInfo; @@ -345,7 +344,11 @@ public: ArrayRef<MVT> getHVXElementTypes() const { static MVT Types[] = {MVT::i8, MVT::i16, MVT::i32}; static MVT TypesV68[] = {MVT::i8, MVT::i16, MVT::i32, MVT::f16, MVT::f32}; + static MVT TypesV81[] = {MVT::i8, MVT::i16, MVT::i32, + MVT::f16, MVT::bf16, MVT::f32}; + if (useHVXV81Ops() && useHVXFloatingPoint()) + return ArrayRef(TypesV81); if (useHVXV68Ops() && useHVXFloatingPoint()) return ArrayRef(TypesV68); return ArrayRef(Types); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index d9824a31..d98fe80 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -48,6 +48,14 @@ static cl::opt<bool> cl::desc("Disable Hardware Loops for Hexagon target")); static cl::opt<bool> + EnableGenWideningVec("hexagon-widening-vectors", cl::init(true), cl::Hidden, + cl::desc("Generate widening vector instructions")); + +static cl::opt<bool> + EnableOptShuffleVec("hexagon-opt-shuffvec", cl::init(true), cl::Hidden, + cl::desc("Enable optimization of shuffle vectors")); + +static cl::opt<bool> DisableAModeOpt("disable-hexagon-amodeopt", cl::Hidden, cl::desc("Disable Hexagon Addressing Mode Optimization")); @@ -321,6 +329,8 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { } void HexagonPassConfig::addIRPasses() { + HexagonTargetMachine &HTM = getHexagonTargetMachine(); + TargetPassConfig::addIRPasses(); bool NoOpt = (getOptLevel() == CodeGenOptLevel::None); @@ -350,6 +360,13 @@ void HexagonPassConfig::addIRPasses() { // Replace certain combinations of shifts and ands with extracts. if (EnableGenExtract) addPass(createHexagonGenExtract()); + if (EnableGenWideningVec) { + addPass(createHexagonGenWideningVecInstr(HTM)); + addPass(createHexagonGenWideningVecFloatInstr(HTM)); + addPass(createDeadCodeEliminationPass()); + } + if (EnableOptShuffleVec) + addPass(createHexagonOptShuffleVector(HTM)); } } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index e925e04..59c6201 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -224,14 +224,6 @@ InstructionCost HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, } InstructionCost -HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, - Align Alignment, unsigned AddressSpace, - TTI::TargetCostKind CostKind) const { - return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, - CostKind); -} - -InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, @@ -240,13 +232,6 @@ HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, return 1; } -InstructionCost HexagonTTIImpl::getGatherScatterOpCost( - unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, - Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const { - return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, - Alignment, CostKind, I); -} - InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, @@ -345,14 +330,16 @@ InstructionCost HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, } bool HexagonTTIImpl::isLegalMaskedStore(Type *DataType, Align /*Alignment*/, - unsigned /*AddressSpace*/) const { + unsigned /*AddressSpace*/, + TTI::MaskKind /*MaskKind*/) const { // This function is called from scalarize-masked-mem-intrin, which runs // in pre-isel. Use ST directly instead of calling isHVXVectorType. return HexagonMaskedVMem && ST.isTypeForHVX(DataType); } bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/, - unsigned /*AddressSpace*/) const { + unsigned /*AddressSpace*/, + TTI::MaskKind /*MaskKind*/) const { // This function is called from scalarize-masked-mem-intrin, which runs // in pre-isel. Use ST directly instead of calling isHVXVectorType. return HexagonMaskedVMem && ST.isTypeForHVX(DataType); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index cec2bf9..edf88cf 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -120,19 +120,10 @@ public: TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I = nullptr) const override; InstructionCost - getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind) const override; - InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef<const Value *> Args = {}, const Instruction *CxtI = nullptr) const override; - InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, - const Value *Ptr, bool VariableMask, - Align Alignment, - TTI::TargetCostKind CostKind, - const Instruction *I) const override; InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, @@ -166,9 +157,10 @@ public: } bool isLegalMaskedStore(Type *DataType, Align Alignment, - unsigned AddressSpace) const override; - bool isLegalMaskedLoad(Type *DataType, Align Alignment, - unsigned AddressSpace) const override; + unsigned AddressSpace, + TTI::MaskKind MaskKind) const override; + bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace, + TTI::MaskKind MaskKind) const override; bool isLegalMaskedGather(Type *Ty, Align Alignment) const override; bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override; bool forceScalarizeMaskedGather(VectorType *VTy, diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index cb88d1a..d39b79a 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -653,7 +653,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI, const MCInstrDesc& MCID = PacketMI.getDesc(); // First operand is always the result. - const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI); + const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0); // Double regs can not feed into new value store: PRM section: 5.4.2.2. if (PacketRC == &Hexagon::DoubleRegsRegClass) return false; @@ -866,7 +866,7 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI, return false; const MCInstrDesc& MCID = PI.getDesc(); - const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI); + const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0); if (DisableVecDblNVStores && VecRC == &Hexagon::HvxWRRegClass) return false; diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 5c50ec2..2813b1d 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -120,10 +120,6 @@ public: size_t length(Value *Val) const; size_t length(Type *Ty) const; - Constant *getNullValue(Type *Ty) const; - Constant *getFullValue(Type *Ty) const; - Constant *getConstSplat(Type *Ty, int Val) const; - Value *simplify(Value *Val) const; Value *insertb(IRBuilderBase &Builder, Value *Dest, Value *Src, int Start, @@ -368,8 +364,8 @@ private: const HexagonVectorCombine &HVC; }; -[[maybe_unused]] -raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const AlignVectors::AddrInfo &AI) { OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n'; OS << "Addr: " << *AI.Addr << '\n'; OS << "Type: " << *AI.ValTy << '\n'; @@ -379,8 +375,8 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { return OS; } -[[maybe_unused]] -raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const AlignVectors::MoveGroup &MG) { OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no"); OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n'; OS << "Main\n"; @@ -398,9 +394,8 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { return OS; } -[[maybe_unused]] -raw_ostream &operator<<(raw_ostream &OS, - const AlignVectors::ByteSpan::Block &B) { +[[maybe_unused]] raw_ostream & +operator<<(raw_ostream &OS, const AlignVectors::ByteSpan::Block &B) { OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "; if (B.Seg.Val == reinterpret_cast<const Value *>(&B)) { OS << "(self:" << B.Seg.Val << ')'; @@ -412,8 +407,8 @@ raw_ostream &operator<<(raw_ostream &OS, return OS; } -[[maybe_unused]] -raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) { +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const AlignVectors::ByteSpan &BS) { OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n'; for (const AlignVectors::ByteSpan::Block &B : BS) OS << B << '\n'; @@ -683,8 +678,8 @@ auto AlignVectors::getMask(Value *Val) const -> Value * { Type *ValTy = getPayload(Val)->getType(); if (auto *VecTy = dyn_cast<VectorType>(ValTy)) - return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy))); - return HVC.getFullValue(HVC.getBoolTy()); + return Constant::getAllOnesValue(HVC.getBoolTy(HVC.length(VecTy))); + return Constant::getAllOnesValue(HVC.getBoolTy()); } auto AlignVectors::getPassThrough(Value *Val) const -> Value * { @@ -1123,7 +1118,7 @@ auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder, BasicBlock *BaseBlock = Builder.GetInsertBlock(); ByteSpan ASpan; - auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen)); + auto *True = Constant::getAllOnesValue(HVC.getBoolTy(ScLen)); auto *Undef = UndefValue::get(SecTy); // Created load does not have to be "Instruction" (e.g. "undef"). @@ -1350,7 +1345,7 @@ auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder, ByteSpan VSection = VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen); Value *Undef = UndefValue::get(SecTy); - Value *Zero = HVC.getNullValue(SecTy); + Value *Zero = Constant::getNullValue(SecTy); Value *AccumV = Undef; Value *AccumM = Zero; for (ByteSpan::Block &S : VSection) { @@ -2475,19 +2470,19 @@ Value *HvxIdioms::processVGather(Instruction &In) const { Dst->eraseFromParent(); } else if (Qual == HvxIdioms::LLVM_Scatter) { // Gather feeds directly into scatter. - LLVM_DEBUG({ - auto *DstInpTy = cast<VectorType>(Dst->getOperand(1)->getType()); - assert(DstInpTy && "Cannot handle no vector type for llvm.scatter"); - unsigned DstInpSize = HVC.getSizeOf(DstInpTy); - unsigned DstElements = HVC.length(DstInpTy); - auto *DstElemTy = cast<PointerType>(DstInpTy->getElementType()); - assert(DstElemTy && "llvm.scatter needs vector of ptr argument"); - dbgs() << " Gather feeds into scatter\n Values to scatter : " - << *Dst->getOperand(0) << "\n"; - dbgs() << " Dst type(" << *DstInpTy << ") elements(" << DstElements - << ") VecLen(" << DstInpSize << ") type(" << *DstElemTy - << ") Access alignment(" << *Dst->getOperand(2) << ")\n"; - }); + auto *DstInpTy = cast<VectorType>(Dst->getOperand(1)->getType()); + assert(DstInpTy && "Cannot handle no vector type for llvm.scatter"); + [[maybe_unused]] unsigned DstInpSize = HVC.getSizeOf(DstInpTy); + [[maybe_unused]] unsigned DstElements = HVC.length(DstInpTy); + [[maybe_unused]] auto *DstElemTy = + cast<PointerType>(DstInpTy->getElementType()); + assert(DstElemTy && "llvm.scatter needs vector of ptr argument"); + LLVM_DEBUG(dbgs() << " Gather feeds into scatter\n Values to scatter : " + << *Dst->getOperand(0) << "\n"); + LLVM_DEBUG(dbgs() << " Dst type(" << *DstInpTy << ") elements(" + << DstElements << ") VecLen(" << DstInpSize << ") type(" + << *DstElemTy << ") Access alignment(" + << *Dst->getOperand(2) << ")\n"); // Address of source auto *Src = getPointer(IndexLoad); if (!Src) @@ -2700,11 +2695,11 @@ auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In, // Do full-precision multiply and shift. Value *Prod32 = createMul16(Builder, Op.X, Op.Y); if (Rounding) { - Value *RoundVal = HVC.getConstSplat(Prod32->getType(), 1 << *Op.RoundAt); + Value *RoundVal = ConstantInt::get(Prod32->getType(), 1 << *Op.RoundAt); Prod32 = Builder.CreateAdd(Prod32, RoundVal, "add"); } - Value *ShiftAmt = HVC.getConstSplat(Prod32->getType(), Op.Frac); + Value *ShiftAmt = ConstantInt::get(Prod32->getType(), Op.Frac); Value *Shifted = Op.X.Sgn == Signed || Op.Y.Sgn == Signed ? Builder.CreateAShr(Prod32, ShiftAmt, "asr") : Builder.CreateLShr(Prod32, ShiftAmt, "lsr"); @@ -2723,10 +2718,10 @@ auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In, // Add the optional rounding to the proper word. if (Op.RoundAt.has_value()) { - Value *Zero = HVC.getNullValue(WordX[0]->getType()); + Value *Zero = Constant::getNullValue(WordX[0]->getType()); SmallVector<Value *> RoundV(WordP.size(), Zero); RoundV[*Op.RoundAt / 32] = - HVC.getConstSplat(HvxWordTy, 1 << (*Op.RoundAt % 32)); + ConstantInt::get(HvxWordTy, 1 << (*Op.RoundAt % 32)); WordP = createAddLong(Builder, WordP, RoundV); } @@ -2734,7 +2729,7 @@ auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In, // Shift all products right by Op.Frac. unsigned SkipWords = Op.Frac / 32; - Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy, Op.Frac % 32); + Constant *ShiftAmt = ConstantInt::get(HvxWordTy, Op.Frac % 32); for (int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) { int Src = Dst + SkipWords; @@ -2803,7 +2798,7 @@ auto HvxIdioms::createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y, } else { AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry); if (CarryIn == nullptr) - CarryIn = HVC.getNullValue(HVC.getBoolTy(HVC.length(VecTy))); + CarryIn = Constant::getNullValue(HVC.getBoolTy(HVC.length(VecTy))); Args.push_back(CarryIn); } Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry, @@ -2951,7 +2946,7 @@ auto HvxIdioms::createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX, } } - Value *Zero = HVC.getNullValue(WordX[0]->getType()); + Value *Zero = Constant::getNullValue(WordX[0]->getType()); auto pop_back_or_zero = [Zero](auto &Vector) -> Value * { if (Vector.empty()) @@ -3147,33 +3142,6 @@ auto HexagonVectorCombine::length(Type *Ty) const -> size_t { return VecTy->getElementCount().getFixedValue(); } -auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * { - assert(Ty->isIntOrIntVectorTy()); - auto Zero = ConstantInt::get(Ty->getScalarType(), 0); - if (auto *VecTy = dyn_cast<VectorType>(Ty)) - return ConstantVector::getSplat(VecTy->getElementCount(), Zero); - return Zero; -} - -auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * { - assert(Ty->isIntOrIntVectorTy()); - auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1); - if (auto *VecTy = dyn_cast<VectorType>(Ty)) - return ConstantVector::getSplat(VecTy->getElementCount(), Minus1); - return Minus1; -} - -auto HexagonVectorCombine::getConstSplat(Type *Ty, int Val) const - -> Constant * { - assert(Ty->isVectorTy()); - auto VecTy = cast<VectorType>(Ty); - Type *ElemTy = VecTy->getElementType(); - // Add support for floats if needed. - auto *Splat = ConstantVector::getSplat(VecTy->getElementCount(), - ConstantInt::get(ElemTy, Val)); - return Splat; -} - auto HexagonVectorCombine::simplify(Value *V) const -> Value * { if (auto *In = dyn_cast<Instruction>(V)) { SimplifyQuery Q(DL, &TLI, &DT, &AC, In); @@ -3581,7 +3549,7 @@ auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder, // If there are too few, fill them with the sign bit. Value *Last = Inputs.back(); Value *Sign = Builder.CreateAShr( - Last, getConstSplat(Last->getType(), Width - 1), "asr"); + Last, ConstantInt::get(Last->getType(), Width - 1), "asr"); Inputs.resize(NeedInputs, Sign); } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 1a0f1ab..5a187d2 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -569,9 +569,9 @@ public: return true; } - bool finishLayout(const MCAssembler &Asm) const override { + bool finishLayout() const override { SmallVector<MCFragment *> Frags; - for (MCSection &Sec : Asm) { + for (MCSection &Sec : *Asm) { Frags.clear(); for (MCFragment &F : Sec) Frags.push_back(&F); @@ -580,7 +580,7 @@ public: default: break; case MCFragment::FT_Align: { - auto Size = Asm.computeFragmentSize(*Frags[J]); + auto Size = Asm->computeFragmentSize(*Frags[J]); for (auto K = J; K != 0 && Size >= HEXAGON_PACKET_SIZE;) { --K; switch (Frags[K]->getKind()) { @@ -597,14 +597,14 @@ public: MCInst Inst = RF.getInst(); const bool WouldTraverseLabel = llvm::any_of( - Asm.symbols(), [&Asm, &RF, &Inst](MCSymbol const &sym) { + Asm->symbols(), [&RF, &Inst, Asm = Asm](MCSymbol const &sym) { uint64_t Offset = 0; - const bool HasOffset = Asm.getSymbolOffset(sym, Offset); + const bool HasOffset = Asm->getSymbolOffset(sym, Offset); const unsigned PacketSizeBytes = HexagonMCInstrInfo::bundleSize(Inst) * HEXAGON_INSTR_SIZE; const bool OffsetPastSym = - Offset <= (Asm.getFragmentOffset(RF) + PacketSizeBytes); + Offset <= Asm->getFragmentOffset(RF) + PacketSizeBytes; return !sym.isVariable() && Offset != 0 && HasOffset && OffsetPastSym; }); @@ -631,7 +631,7 @@ public: *RF.getSubtargetInfo(), Inst); //assert(!Error); (void)Error; - ReplaceInstruction(Asm.getEmitter(), RF, Inst); + ReplaceInstruction(Asm->getEmitter(), RF, Inst); Size = 0; // Only look back one instruction break; } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index 9b6bc5a..0b2279b 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -385,7 +385,7 @@ bool HexagonMCChecker::checkSlots() { bool HexagonMCChecker::checkPredicates() { // Check for proper use of new predicate registers. for (const auto &I : NewPreds) { - unsigned P = I; + MCRegister P = I; if (!Defs.count(P) || LatePreds.count(P) || Defs.count(Hexagon::P3_0)) { // Error out if the new predicate register is not defined, @@ -398,7 +398,7 @@ bool HexagonMCChecker::checkPredicates() { // Check for proper use of auto-anded of predicate registers. for (const auto &I : LatePreds) { - unsigned P = I; + MCRegister P = I; if (LatePreds.count(P) > 1 || Defs.count(P)) { // Error out if predicate register defined "late" multiple times or @@ -607,7 +607,7 @@ void HexagonMCChecker::checkRegisterCurDefs() { bool HexagonMCChecker::checkRegisters() { // Check for proper register definitions. for (const auto &I : Defs) { - unsigned R = I.first; + MCRegister R = I.first; if (isLoopRegister(R) && Defs.count(R) > 1 && (HexagonMCInstrInfo::isInnerLoop(MCB) || @@ -620,8 +620,8 @@ bool HexagonMCChecker::checkRegisters() { if (SoftDefs.count(R)) { // Error out for explicit changes to registers also weakly defined // (e.g., "{ usr = r0; r0 = sfadd(...) }"). - unsigned UsrR = Hexagon::USR; // Silence warning about mixed types in ?:. - unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; + MCRegister UsrR = Hexagon::USR; + MCRegister BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; reportErrorRegisters(BadR); return false; } @@ -633,8 +633,8 @@ bool HexagonMCChecker::checkRegisters() { if (PM.count(Unconditional)) { // Error out on an unconditional change when there are any other // changes, conditional or not. - unsigned UsrR = Hexagon::USR; - unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; + MCRegister UsrR = Hexagon::USR; + MCRegister BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; reportErrorRegisters(BadR); return false; } @@ -664,7 +664,7 @@ bool HexagonMCChecker::checkRegisters() { // Check for use of temporary definitions. for (const auto &I : TmpDefs) { - unsigned R = I; + MCRegister R = I; if (!Uses.count(R)) { // special case for vhist @@ -765,12 +765,12 @@ void HexagonMCChecker::compoundRegisterMap(unsigned &Register) { } } -void HexagonMCChecker::reportErrorRegisters(unsigned Register) { +void HexagonMCChecker::reportErrorRegisters(MCRegister Register) { reportError("register `" + Twine(RI.getName(Register)) + "' modified more than once"); } -void HexagonMCChecker::reportErrorNewValue(unsigned Register) { +void HexagonMCChecker::reportErrorNewValue(MCRegister Register) { reportError("register `" + Twine(RI.getName(Register)) + "' used with `.new' " "but not validly modified in the same packet"); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h index e9b87c5..8beee8d 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -39,41 +39,41 @@ class HexagonMCChecker { bool ReportErrors; /// Set of definitions: register #, if predicated, if predicated true. - using PredSense = std::pair<unsigned, bool>; + using PredSense = std::pair<MCRegister, bool>; static const PredSense Unconditional; using PredSet = std::multiset<PredSense>; using PredSetIterator = std::multiset<PredSense>::iterator; - using DefsIterator = DenseMap<unsigned, PredSet>::iterator; - DenseMap<unsigned, PredSet> Defs; + using DefsIterator = DenseMap<MCRegister, PredSet>::iterator; + DenseMap<MCRegister, PredSet> Defs; /// Set of weak definitions whose clashes should be enforced selectively. - using SoftDefsIterator = std::set<unsigned>::iterator; - std::set<unsigned> SoftDefs; + using SoftDefsIterator = std::set<MCRegister>::iterator; + std::set<MCRegister> SoftDefs; /// Set of temporary definitions not committed to the register file. - using TmpDefsIterator = std::set<unsigned>::iterator; - std::set<unsigned> TmpDefs; + using TmpDefsIterator = std::set<MCRegister>::iterator; + std::set<MCRegister> TmpDefs; /// Set of new predicates used. - using NewPredsIterator = std::set<unsigned>::iterator; - std::set<unsigned> NewPreds; + using NewPredsIterator = std::set<MCRegister>::iterator; + std::set<MCRegister> NewPreds; /// Set of predicates defined late. - using LatePredsIterator = std::multiset<unsigned>::iterator; - std::multiset<unsigned> LatePreds; + using LatePredsIterator = std::multiset<MCRegister>::iterator; + std::multiset<MCRegister> LatePreds; /// Set of uses. - using UsesIterator = std::set<unsigned>::iterator; - std::set<unsigned> Uses; + using UsesIterator = std::set<MCRegister>::iterator; + std::set<MCRegister> Uses; /// Pre-defined set of read-only registers. - using ReadOnlyIterator = std::set<unsigned>::iterator; - std::set<unsigned> ReadOnly; + using ReadOnlyIterator = std::set<MCRegister>::iterator; + std::set<MCRegister> ReadOnly; // Contains the vector-pair-registers with the even number // first ("v0:1", e.g.) used/def'd in this packet. - std::set<unsigned> ReversePairs; + std::set<MCRegister> ReversePairs; void init(); void init(MCInst const &); @@ -107,7 +107,7 @@ class HexagonMCChecker { static void compoundRegisterMap(unsigned &); - bool isLoopRegister(unsigned R) const { + bool isLoopRegister(MCRegister R) const { return (Hexagon::SA0 == R || Hexagon::LC0 == R || Hexagon::SA1 == R || Hexagon::LC1 == R); } @@ -120,8 +120,8 @@ public: MCSubtargetInfo const &STI, bool CopyReportErrors); bool check(bool FullCheck = true); - void reportErrorRegisters(unsigned Register); - void reportErrorNewValue(unsigned Register); + void reportErrorRegisters(MCRegister Register); + void reportErrorNewValue(MCRegister Register); void reportError(SMLoc Loc, Twine const &Msg); void reportNote(SMLoc Loc, Twine const &Msg); void reportError(Twine const &Msg); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h index c5e57d0..712bdbe 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h @@ -21,7 +21,6 @@ #include "llvm/TargetParser/SubtargetFeature.h" #include <cstddef> #include <cstdint> -#include <memory> namespace llvm { diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 2f59b7c..10c350e 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -67,6 +67,11 @@ void HexagonMCELFStreamer::emitInstruction(const MCInst &MCB, assert(MCB.getOpcode() == Hexagon::BUNDLE); assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE); assert(HexagonMCInstrInfo::bundleSize(MCB) > 0); + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + HexagonMCChecker Check(getContext(), *MCII, STI, const_cast<MCInst &>(MCB), + *RI); + [[maybe_unused]] bool CheckOk = Check.check(false); + assert(CheckOk); // At this point, MCB is a bundle // Iterate through the bundle and assign addends for the instructions diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp index 3b1d3bd..4d0df66 100644 --- a/llvm/lib/Target/Hexagon/RDFCopy.cpp +++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp @@ -26,7 +26,6 @@ #include "llvm/Support/raw_ostream.h" #include <cassert> #include <cstdint> -#include <utility> using namespace llvm; using namespace rdf; @@ -44,11 +43,11 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) { const MachineOperand &Src = MI->getOperand(1); RegisterRef DstR = DFG.makeRegRef(Dst.getReg(), Dst.getSubReg()); RegisterRef SrcR = DFG.makeRegRef(Src.getReg(), Src.getSubReg()); - assert(Register::isPhysicalRegister(DstR.Reg)); - assert(Register::isPhysicalRegister(SrcR.Reg)); + assert(DstR.asMCReg().isPhysical()); + assert(SrcR.asMCReg().isPhysical()); const TargetRegisterInfo &TRI = DFG.getTRI(); - if (TRI.getMinimalPhysRegClass(DstR.Reg) != - TRI.getMinimalPhysRegClass(SrcR.Reg)) + if (TRI.getMinimalPhysRegClass(DstR.asMCReg()) != + TRI.getMinimalPhysRegClass(SrcR.asMCReg())) return false; if (!DFG.isTracked(SrcR) || !DFG.isTracked(DstR)) return false; @@ -66,7 +65,7 @@ void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM) { Copies.push_back(SA.Id); for (auto I : EM) { - auto FS = DefM.find(I.second.Reg); + auto FS = DefM.find(I.second.Id); if (FS == DefM.end() || FS->second.empty()) continue; // Undefined source RDefMap[I.second][SA.Id] = FS->second.top()->Id; @@ -93,7 +92,7 @@ void CopyPropagation::updateMap(NodeAddr<InstrNode*> IA) { for (auto &R : RDefMap) { if (!RRs.count(R.first)) continue; - auto F = DefM.find(R.first.Reg); + auto F = DefM.find(R.first.Id); if (F == DefM.end() || F->second.empty()) continue; R.second[IA.Id] = F->second.top()->Id; @@ -155,16 +154,16 @@ bool CopyPropagation::run() { bool HasLimit = CpLimit.getNumOccurrences() > 0; #endif - auto MinPhysReg = [this] (RegisterRef RR) -> unsigned { + auto MinPhysReg = [this](RegisterRef RR) -> MCRegister { const TargetRegisterInfo &TRI = DFG.getTRI(); - const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Reg); + const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.asMCReg()); if ((RC.LaneMask & RR.Mask) == RC.LaneMask) - return RR.Reg; - for (MCSubRegIndexIterator S(RR.Reg, &TRI); S.isValid(); ++S) + return RR.asMCReg(); + for (MCSubRegIndexIterator S(RR.asMCReg(), &TRI); S.isValid(); ++S) if (RR.Mask == TRI.getSubRegIndexLaneMask(S.getSubRegIndex())) return S.getSubReg(); llvm_unreachable("Should have found a register"); - return 0; + return MCRegister(); }; const PhysicalRegisterInfo &PRI = DFG.getPRI(); @@ -215,7 +214,7 @@ bool CopyPropagation::run() { << *NodeAddr<StmtNode*>(IA).Addr->getCode(); } - unsigned NewReg = MinPhysReg(SR); + MCRegister NewReg = MinPhysReg(SR); Op.setReg(NewReg); Op.setSubReg(0); DFG.unlinkUse(UA, false); |
