diff options
Diffstat (limited to 'llvm/lib')
63 files changed, 3581 insertions, 1936 deletions
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 853bd66..a572eef 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -1582,6 +1582,23 @@ static const SCEV *minusSCEVNoSignedOverflow(const SCEV *A, const SCEV *B, return nullptr; } +/// Returns the absolute value of \p A. In the context of dependence analysis, +/// we need an absolute value in a mathematical sense. If \p A is the signed +/// minimum value, we cannot represent it unless extending the original type. +/// Thus if we cannot prove that \p A is not the signed minimum value, returns +/// nullptr. +static const SCEV *absSCEVNoSignedOverflow(const SCEV *A, ScalarEvolution &SE) { + IntegerType *Ty = cast<IntegerType>(A->getType()); + if (!Ty) + return nullptr; + + const SCEV *SMin = + SE.getConstant(APInt::getSignedMinValue(Ty->getBitWidth())); + if (!SE.isKnownPredicate(CmpInst::ICMP_NE, A, SMin)) + return nullptr; + return SE.getAbsExpr(A, /*IsNSW=*/true); +} + /// Returns true iff \p Test is enabled. static bool isDependenceTestEnabled(DependenceTestType Test) { if (EnableDependenceTest == DependenceTestType::All) @@ -1669,21 +1686,25 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); // check that |Delta| < iteration count - if (const SCEV *UpperBound = - collectUpperBound(CurSrcLoop, Delta->getType())) { + bool IsDeltaLarge = [&] { + const SCEV *UpperBound = collectUpperBound(CurSrcLoop, Delta->getType()); + if (!UpperBound) + return false; + LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); - const SCEV *AbsDelta = - SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); - const SCEV *AbsCoeff = - SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); + const SCEV *AbsDelta = absSCEVNoSignedOverflow(Delta, *SE); + const SCEV *AbsCoeff = absSCEVNoSignedOverflow(Coeff, *SE); + if (!AbsDelta || !AbsCoeff) + return false; const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); - if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { - // Distance greater than trip count - no dependence - ++StrongSIVindependence; - ++StrongSIVsuccesses; - return true; - } + return isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product); + }(); + if (IsDeltaLarge) { + // Distance greater than trip count - no dependence + ++StrongSIVindependence; + ++StrongSIVsuccesses; + return true; } // Can we compute distance? @@ -2259,6 +2280,9 @@ bool DependenceInfo::weakZeroSrcSIVtest( const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); if (!ConstCoeff) return false; + + // Since ConstCoeff is constant, !isKnownNegative means it's non-negative. + // TODO: Bail out if it's a signed minimum value. const SCEV *AbsCoeff = SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; @@ -2369,6 +2393,9 @@ bool DependenceInfo::weakZeroDstSIVtest( const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); if (!ConstCoeff) return false; + + // Since ConstCoeff is constant, !isKnownNegative means it's non-negative. + // TODO: Bail out if it's a signed minimum value. const SCEV *AbsCoeff = SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 9e78ec9..8ea1326 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -4030,7 +4030,6 @@ bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const { /// if it is simplified. class SimplificationTracker { DenseMap<Value *, Value *> Storage; - const SimplifyQuery &SQ; // Tracks newly created Phi nodes. The elements are iterated by insertion // order. PhiNodeSet AllPhiNodes; @@ -4038,8 +4037,6 @@ class SimplificationTracker { SmallPtrSet<SelectInst *, 32> AllSelectNodes; public: - SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {} - Value *Get(Value *V) { do { auto SV = Storage.find(V); @@ -4049,30 +4046,6 @@ public: } while (true); } - Value *Simplify(Value *Val) { - SmallVector<Value *, 32> WorkList; - SmallPtrSet<Value *, 32> Visited; - WorkList.push_back(Val); - while (!WorkList.empty()) { - auto *P = WorkList.pop_back_val(); - if (!Visited.insert(P).second) - continue; - if (auto *PI = dyn_cast<Instruction>(P)) - if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) { - for (auto *U : PI->users()) - WorkList.push_back(cast<Value>(U)); - Put(PI, V); - PI->replaceAllUsesWith(V); - if (auto *PHI = dyn_cast<PHINode>(PI)) - AllPhiNodes.erase(PHI); - if (auto *Select = dyn_cast<SelectInst>(PI)) - AllSelectNodes.erase(Select); - PI->eraseFromParent(); - } - } - return Get(Val); - } - void Put(Value *From, Value *To) { Storage.insert({From, To}); } void ReplacePhi(PHINode *From, PHINode *To) { @@ -4133,8 +4106,7 @@ private: /// Common Type for all different fields in addressing modes. Type *CommonType = nullptr; - /// SimplifyQuery for simplifyInstruction utility. - const SimplifyQuery &SQ; + const DataLayout &DL; /// Original Address. Value *Original; @@ -4143,8 +4115,8 @@ private: Value *CommonValue = nullptr; public: - AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue) - : SQ(_SQ), Original(OriginalValue) {} + AddressingModeCombiner(const DataLayout &DL, Value *OriginalValue) + : DL(DL), Original(OriginalValue) {} ~AddressingModeCombiner() { eraseCommonValueIfDead(); } @@ -4256,7 +4228,7 @@ private: // Keep track of keys where the value is null. We will need to replace it // with constant null when we know the common type. SmallVector<Value *, 2> NullValue; - Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); + Type *IntPtrTy = DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); for (auto &AM : AddrModes) { Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy); if (DV) { @@ -4306,7 +4278,7 @@ private: // simplification is possible only if original phi/selects were not // simplified yet. // Using this mapping we can find the current value in AddrToBase. - SimplificationTracker ST(SQ); + SimplificationTracker ST; // First step, DFS to create PHI nodes for all intermediate blocks. // Also fill traverse order for the second step. @@ -4465,7 +4437,6 @@ private: PHI->addIncoming(ST.Get(Map[PV]), B); } } - Map[Current] = ST.Simplify(V); } } @@ -5856,8 +5827,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the graph are compatible. bool PhiOrSelectSeen = false; SmallVector<Instruction *, 16> AddrModeInsts; - const SimplifyQuery SQ(*DL, TLInfo); - AddressingModeCombiner AddrModes(SQ, Addr); + AddressingModeCombiner AddrModes(*DL, Addr); TypePromotionTransaction TPT(RemovedInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f9d27b0..178529f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4748,6 +4748,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case G_FMINIMUMNUM: case G_FMAXIMUMNUM: return lowerFMinNumMaxNum(MI); + case G_FMINIMUM: + case G_FMAXIMUM: + return lowerFMinimumMaximum(MI); case G_MERGE_VALUES: return lowerMergeValues(MI); case G_UNMERGE_VALUES: @@ -8777,6 +8780,77 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFMinimumMaximum(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + auto [Dst, Src0, Src1] = MI.getFirst3Regs(); + LLT Ty = MRI.getType(Dst); + LLT CmpTy = Ty.changeElementSize(1); + + bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM); + unsigned OpcIeee = + IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE; + unsigned OpcNonIeee = + IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM; + bool MinMaxMustRespectOrderedZero = false; + Register Res; + + // IEEE variants don't need canonicalization + if (LI.isLegalOrCustom({OpcIeee, Ty})) { + Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0); + MinMaxMustRespectOrderedZero = true; + } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) { + Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0); + } else { + auto Compare = MIRBuilder.buildFCmp( + IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1); + Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0); + } + + // Propagate any NaN of both operands + if (!MI.getFlag(MachineInstr::FmNoNans) && + (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) { + auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1); + + LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType(); + APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy)); + Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0); + if (Ty.isVector()) + NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0); + + Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0); + } + + // fminimum/fmaximum requires -0.0 less than +0.0 + if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) { + GISelValueTracking VT(MIRBuilder.getMF()); + KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero); + KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero); + + if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) { + const unsigned Flags = MI.getFlags(); + Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0); + auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero); + + unsigned TestClass = IsMax ? fcPosZero : fcNegZero; + + auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass); + auto LHSSelect = + MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags); + + auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass); + auto RHSSelect = + MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags); + + Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0); + } + } + + MIRBuilder.buildCopy(Dst, Res); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c Register DstReg = MI.getOperand(0).getReg(); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 72b364c..697b779 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -211,7 +211,7 @@ private: unsigned getSparseSetIndex() const { return VirtReg.virtRegIndex(); } }; - using LiveRegMap = SparseSet<LiveReg, unsigned, identity_cxx20, uint16_t>; + using LiveRegMap = SparseSet<LiveReg, unsigned, identity, uint16_t>; /// This map contains entries for each virtual register that is currently /// available in a physical register. LiveRegMap LiveVirtRegs; diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 8d413a3..d029ac5 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -2901,13 +2901,23 @@ ExecutionSession::IL_emit(MaterializationResponsibility &MR, for (auto &SN : ER.Ready) IL_collectQueries( - EQ.Updated, SN->defs(), + EQ.Completed, SN->defs(), [](JITDylib::SymbolTableEntry &E) { E.setState(SymbolState::Ready); }, [](AsynchronousSymbolQuery &Q, JITDylib &JD, NonOwningSymbolStringPtr Name, JITDylib::SymbolTableEntry &E) { Q.notifySymbolMetRequiredState(SymbolStringPtr(Name), E.getSymbol()); }); + // std::erase_if is not available in C++17, and llvm::erase_if does not work + // here. + for (auto it = EQ.Completed.begin(), end = EQ.Completed.end(); it != end;) { + if ((*it)->isComplete()) { + ++it; + } else { + it = EQ.Completed.erase(it); + } + } + #ifdef EXPENSIVE_CHECKS verifySessionState("exiting ExecutionSession::IL_emit"); #endif @@ -3043,9 +3053,8 @@ Error ExecutionSession::OL_notifyEmitted( } } - for (auto &UQ : EmitQueries->Updated) - if (UQ->isComplete()) - UQ->handleComplete(*this); + for (auto &UQ : EmitQueries->Completed) + UQ->handleComplete(*this); // If there are any bad dependencies then return an error. if (!BadDeps.empty()) { diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt index ca8192b..9275586 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt @@ -16,11 +16,9 @@ add_llvm_component_library(LLVMOrcTargetProcess ExecutorSharedMemoryMapperService.cpp DefaultHostBootstrapValues.cpp ExecutorResolver.cpp - LibraryResolver.cpp JITLoaderGDB.cpp JITLoaderPerf.cpp JITLoaderVTune.cpp - LibraryScanner.cpp OrcRTBootstrap.cpp RegisterEHFrames.cpp SimpleExecutorDylibManager.cpp @@ -38,8 +36,6 @@ add_llvm_component_library(LLVMOrcTargetProcess LINK_COMPONENTS ${intel_jit_profiling} - BinaryFormat - Object OrcShared Support TargetParser diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp deleted file mode 100644 index 9d25b74..0000000 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp +++ /dev/null @@ -1,369 +0,0 @@ -//===- LibraryResolver.cpp - Library Resolution of Unresolved Symbols ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Library resolution impl for unresolved symbols -// -//===----------------------------------------------------------------------===// - -#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h" -#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h" - -#include "llvm/ADT/StringSet.h" - -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Object/COFF.h" -#include "llvm/Object/ELF.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/Object/MachO.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/Error.h" - -#include <mutex> -#include <thread> - -#define DEBUG_TYPE "orc-resolver" - -namespace llvm::orc { - -LibraryResolver::LibraryResolver(const LibraryResolver::Setup &S) - : LibPathCache(S.Cache ? S.Cache : std::make_shared<LibraryPathCache>()), - LibPathResolver(S.PResolver - ? S.PResolver - : std::make_shared<PathResolver>(LibPathCache)), - ScanHelper(S.BasePaths, LibPathCache, LibPathResolver), - FB(S.FilterBuilder), LibMgr(), - ShouldScanCall(S.ShouldScanCall ? S.ShouldScanCall - : [](StringRef) -> bool { return true; }), - scanBatchSize(S.ScanBatchSize) { - - if (ScanHelper.getAllUnits().empty()) { - LLVM_DEBUG(dbgs() << "Warning: No base paths provided for scanning.\n"); - } -} - -std::unique_ptr<LibraryResolutionDriver> -LibraryResolutionDriver::create(const LibraryResolver::Setup &S) { - auto LR = std::make_unique<LibraryResolver>(S); - return std::unique_ptr<LibraryResolutionDriver>( - new LibraryResolutionDriver(std::move(LR))); -} - -void LibraryResolutionDriver::addScanPath(const std::string &Path, PathType K) { - LR->ScanHelper.addBasePath(Path, K); -} - -bool LibraryResolutionDriver::markLibraryLoaded(StringRef Path) { - auto Lib = LR->LibMgr.getLibrary(Path); - if (!Lib) - return false; - - Lib->setState(LibraryManager::LibState::Loaded); - - return true; -} - -bool LibraryResolutionDriver::markLibraryUnLoaded(StringRef Path) { - auto Lib = LR->LibMgr.getLibrary(Path); - if (!Lib) - return false; - - Lib->setState(LibraryManager::LibState::Unloaded); - - return true; -} - -void LibraryResolutionDriver::resolveSymbols( - std::vector<std::string> Syms, - LibraryResolver::OnSearchComplete OnCompletion, - const SearchConfig &Config) { - LR->searchSymbolsInLibraries(Syms, std::move(OnCompletion), Config); -} - -static bool shouldIgnoreSymbol(const object::SymbolRef &Sym, - uint32_t IgnoreFlags) { - Expected<uint32_t> FlagsOrErr = Sym.getFlags(); - if (!FlagsOrErr) { - consumeError(FlagsOrErr.takeError()); - return true; - } - - uint32_t Flags = *FlagsOrErr; - - using Filter = SymbolEnumeratorOptions; - if ((IgnoreFlags & Filter::IgnoreUndefined) && - (Flags & object::SymbolRef::SF_Undefined)) - return true; - if ((IgnoreFlags & Filter::IgnoreIndirect) && - (Flags & object::SymbolRef::SF_Indirect)) - return true; - if ((IgnoreFlags & Filter::IgnoreWeak) && - (Flags & object::SymbolRef::SF_Weak)) - return true; - - return false; -} - -bool SymbolEnumerator::enumerateSymbols(StringRef Path, OnEachSymbolFn OnEach, - const SymbolEnumeratorOptions &Opts) { - if (Path.empty()) - return false; - - ObjectFileLoader ObjLoader(Path); - - auto ObjOrErr = ObjLoader.getObjectFile(); - if (!ObjOrErr) { - std::string ErrMsg; - handleAllErrors(ObjOrErr.takeError(), - [&](const ErrorInfoBase &EIB) { ErrMsg = EIB.message(); }); - LLVM_DEBUG(dbgs() << "Failed loading object file: " << Path - << "\nError: " << ErrMsg << "\n"); - return false; - } - - object::ObjectFile *Obj = &ObjOrErr.get(); - - auto processSymbolRange = - [&](object::ObjectFile::symbol_iterator_range Range) -> EnumerateResult { - for (const auto &Sym : Range) { - if (shouldIgnoreSymbol(Sym, Opts.FilterFlags)) - continue; - - auto NameOrErr = Sym.getName(); - if (!NameOrErr) { - consumeError(NameOrErr.takeError()); - continue; - } - - StringRef Name = *NameOrErr; - if (Name.empty()) - continue; - - EnumerateResult Res = OnEach(Name); - if (Res != EnumerateResult::Continue) - return Res; - } - return EnumerateResult::Continue; - }; - - EnumerateResult Res = processSymbolRange(Obj->symbols()); - if (Res != EnumerateResult::Continue) - return Res == EnumerateResult::Stop; - - if (Obj->isELF()) { - const auto *ElfObj = cast<object::ELFObjectFileBase>(Obj); - Res = processSymbolRange(ElfObj->getDynamicSymbolIterators()); - if (Res != EnumerateResult::Continue) - return Res == EnumerateResult::Stop; - } else if (Obj->isCOFF()) { - const auto *CoffObj = cast<object::COFFObjectFile>(Obj); - for (auto I = CoffObj->export_directory_begin(), - E = CoffObj->export_directory_end(); - I != E; ++I) { - StringRef Name; - if (I->getSymbolName(Name)) - continue; - if (Name.empty()) - continue; - - if (OnEach(Name) != EnumerateResult::Continue) - return false; - } - } else if (Obj->isMachO()) { - } - - return true; -} - -class SymbolSearchContext { -public: - SymbolSearchContext(SymbolQuery &Q) : Q(Q) {} - - bool hasSearched(LibraryInfo *Lib) const { return Searched.count(Lib); } - - void markSearched(LibraryInfo *Lib) { Searched.insert(Lib); } - - inline bool allResolved() const { return Q.allResolved(); } - - SymbolQuery &query() { return Q; } - -private: - SymbolQuery &Q; - DenseSet<LibraryInfo *> Searched; -}; - -void LibraryResolver::resolveSymbolsInLibrary( - LibraryInfo &Lib, SymbolQuery &UnresolvedSymbols, - const SymbolEnumeratorOptions &Opts) { - LLVM_DEBUG(dbgs() << "Checking unresolved symbols " - << " in library : " << Lib.getFileName() << "\n";); - StringSet<> DiscoveredSymbols; - - if (!UnresolvedSymbols.hasUnresolved()) { - LLVM_DEBUG(dbgs() << "Skipping library: " << Lib.getFullPath() - << " — unresolved symbols exist.\n";); - return; - } - - bool HasEnumerated = false; - auto enumerateSymbolsIfNeeded = [&]() { - if (HasEnumerated) - return; - - HasEnumerated = true; - - LLVM_DEBUG(dbgs() << "Enumerating symbols in library: " << Lib.getFullPath() - << "\n";); - SymbolEnumerator::enumerateSymbols( - Lib.getFullPath(), - [&](StringRef sym) { - DiscoveredSymbols.insert(sym); - return EnumerateResult::Continue; - }, - Opts); - - if (DiscoveredSymbols.empty()) { - LLVM_DEBUG(dbgs() << " No symbols and remove library : " - << Lib.getFullPath() << "\n";); - LibMgr.removeLibrary(Lib.getFullPath()); - return; - } - }; - - if (!Lib.hasFilter()) { - LLVM_DEBUG(dbgs() << "Building filter for library: " << Lib.getFullPath() - << "\n";); - enumerateSymbolsIfNeeded(); - SmallVector<StringRef> SymbolVec; - SymbolVec.reserve(DiscoveredSymbols.size()); - for (const auto &KV : DiscoveredSymbols) - SymbolVec.push_back(KV.first()); - - Lib.ensureFilterBuilt(FB, SymbolVec); - LLVM_DEBUG({ - dbgs() << "DiscoveredSymbols : " << DiscoveredSymbols.size() << "\n"; - for (const auto &KV : DiscoveredSymbols) - dbgs() << "DiscoveredSymbols : " << KV.first() << "\n"; - }); - } - - const auto &Unresolved = UnresolvedSymbols.getUnresolvedSymbols(); - bool HadAnySym = false; - LLVM_DEBUG(dbgs() << "Total unresolved symbols : " << Unresolved.size() - << "\n";); - for (const auto &Sym : Unresolved) { - if (Lib.mayContain(Sym)) { - LLVM_DEBUG(dbgs() << "Checking symbol '" << Sym - << "' in library: " << Lib.getFullPath() << "\n";); - enumerateSymbolsIfNeeded(); - if (DiscoveredSymbols.count(Sym) > 0) { - LLVM_DEBUG(dbgs() << " Resolved symbol: " << Sym - << " in library: " << Lib.getFullPath() << "\n";); - UnresolvedSymbols.resolve(Sym, Lib.getFullPath()); - HadAnySym = true; - } - } - } - - using LibraryState = LibraryManager::LibState; - if (HadAnySym && Lib.getState() != LibraryState::Loaded) - Lib.setState(LibraryState::Queried); -} - -void LibraryResolver::searchSymbolsInLibraries( - std::vector<std::string> &SymbolList, OnSearchComplete OnComplete, - const SearchConfig &Config) { - SymbolQuery Q(SymbolList); - - using LibraryState = LibraryManager::LibState; - using LibraryType = PathType; - auto tryResolveFrom = [&](LibraryState S, LibraryType K) { - LLVM_DEBUG(dbgs() << "Trying resolve from state=" << static_cast<int>(S) - << " type=" << static_cast<int>(K) << "\n";); - - SymbolSearchContext Ctx(Q); - while (!Ctx.allResolved()) { - - for (auto &Lib : LibMgr.getView(S, K)) { - if (Ctx.hasSearched(Lib.get())) - continue; - - // can use Async here? - resolveSymbolsInLibrary(*Lib, Ctx.query(), Config.Options); - Ctx.markSearched(Lib.get()); - - if (Ctx.allResolved()) - return; - } - - if (Ctx.allResolved()) - return; - - if (!scanLibrariesIfNeeded(K, scanBatchSize)) - break; // no more new libs to scan - } - }; - - for (const auto &[St, Ty] : Config.Policy.Plan) { - tryResolveFrom(St, Ty); - if (Q.allResolved()) - break; - } - - // done: - LLVM_DEBUG({ - dbgs() << "Search complete.\n"; - for (const auto &r : Q.getAllResults()) - dbgs() << "Resolved Symbol:" << r->Name << " -> " << r->ResolvedLibPath - << "\n"; - }); - - OnComplete(Q); -} - -bool LibraryResolver::scanLibrariesIfNeeded(PathType PK, size_t BatchSize) { - LLVM_DEBUG(dbgs() << "LibraryResolver::scanLibrariesIfNeeded: Scanning for " - << (PK == PathType::User ? "User" : "System") - << " libraries\n";); - if (!ScanHelper.leftToScan(PK)) - return false; - - LibraryScanner Scanner(ScanHelper, LibMgr, ShouldScanCall); - Scanner.scanNext(PK, BatchSize); - return true; -} - -bool LibraryResolver::symbolExistsInLibrary(const LibraryInfo &Lib, - StringRef SymName, - std::vector<std::string> *AllSyms) { - SymbolEnumeratorOptions Opts; - return symbolExistsInLibrary(Lib, SymName, AllSyms, Opts); -} - -bool LibraryResolver::symbolExistsInLibrary( - const LibraryInfo &Lib, StringRef SymName, - std::vector<std::string> *AllSyms, const SymbolEnumeratorOptions &Opts) { - bool Found = false; - - SymbolEnumerator::enumerateSymbols( - Lib.getFullPath(), - [&](StringRef Sym) { - if (AllSyms) - AllSyms->emplace_back(Sym.str()); - - if (Sym == SymName) { - Found = true; - } - - return EnumerateResult::Continue; - }, - Opts); - - return Found; -} - -} // end namespace llvm::orc diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp deleted file mode 100644 index f1e8b5d..0000000 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp +++ /dev/null @@ -1,1161 +0,0 @@ -//===- LibraryScanner.cpp - Provide Library Scanning Implementation ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h" -#include "llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h" - -#include "llvm/ADT/StringExtras.h" -#include "llvm/Object/COFF.h" -#include "llvm/Object/ELF.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/Object/ELFTypes.h" -#include "llvm/Object/MachO.h" -#include "llvm/Object/MachOUniversal.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Program.h" -#include "llvm/TargetParser/Host.h" -#include "llvm/TargetParser/Triple.h" - -#ifdef LLVM_ON_UNIX -#include <sys/stat.h> -#include <unistd.h> -#endif // LLVM_ON_UNIX - -#ifdef __APPLE__ -#include <sys/stat.h> -#undef LC_LOAD_DYLIB -#undef LC_RPATH -#endif // __APPLE__ - -#define DEBUG_TYPE "orc-scanner" - -namespace llvm::orc { - -void handleError(Error Err, StringRef context = "") { - consumeError(handleErrors(std::move(Err), [&](const ErrorInfoBase &EIB) { - dbgs() << "LLVM Error"; - if (!context.empty()) - dbgs() << " [" << context << "]"; - dbgs() << ": " << EIB.message() << "\n"; - })); -} - -bool ObjectFileLoader::isArchitectureCompatible(const object::ObjectFile &Obj) { - Triple HostTriple(sys::getDefaultTargetTriple()); - Triple ObjTriple = Obj.makeTriple(); - - LLVM_DEBUG({ - dbgs() << "Host triple: " << HostTriple.str() - << ", Object triple: " << ObjTriple.str() << "\n"; - }); - - if (ObjTriple.getArch() != Triple::UnknownArch && - HostTriple.getArch() != ObjTriple.getArch()) - return false; - - if (ObjTriple.getOS() != Triple::UnknownOS && - HostTriple.getOS() != ObjTriple.getOS()) - return false; - - if (ObjTriple.getEnvironment() != Triple::UnknownEnvironment && - HostTriple.getEnvironment() != Triple::UnknownEnvironment && - HostTriple.getEnvironment() != ObjTriple.getEnvironment()) - return false; - - return true; -} - -Expected<object::OwningBinary<object::ObjectFile>> -ObjectFileLoader::loadObjectFileWithOwnership(StringRef FilePath) { - LLVM_DEBUG(dbgs() << "ObjectFileLoader: Attempting to open file " << FilePath - << "\n";); - auto BinOrErr = object::createBinary(FilePath); - if (!BinOrErr) { - LLVM_DEBUG(dbgs() << "ObjectFileLoader: Failed to open file " << FilePath - << "\n";); - return BinOrErr.takeError(); - } - - LLVM_DEBUG(dbgs() << "ObjectFileLoader: Successfully opened file " << FilePath - << "\n";); - - auto OwningBin = BinOrErr->takeBinary(); - object::Binary *Bin = OwningBin.first.get(); - - if (Bin->isArchive()) { - LLVM_DEBUG(dbgs() << "ObjectFileLoader: File is an archive, not supported: " - << FilePath << "\n";); - return createStringError(std::errc::invalid_argument, - "Archive files are not supported: %s", - FilePath.str().c_str()); - } - -#if defined(__APPLE__) - if (auto *UB = dyn_cast<object::MachOUniversalBinary>(Bin)) { - LLVM_DEBUG(dbgs() << "ObjectFileLoader: Detected Mach-O universal binary: " - << FilePath << "\n";); - for (auto ObjForArch : UB->objects()) { - auto ObjOrErr = ObjForArch.getAsObjectFile(); - if (!ObjOrErr) { - LLVM_DEBUG( - dbgs() - << "ObjectFileLoader: Skipping invalid architecture slice\n";); - - consumeError(ObjOrErr.takeError()); - continue; - } - - std::unique_ptr<object::ObjectFile> Obj = std::move(ObjOrErr.get()); - if (isArchitectureCompatible(*Obj)) { - LLVM_DEBUG( - dbgs() << "ObjectFileLoader: Found compatible object slice\n";); - - return object::OwningBinary<object::ObjectFile>( - std::move(Obj), std::move(OwningBin.second)); - - } else { - LLVM_DEBUG(dbgs() << "ObjectFileLoader: Incompatible architecture " - "slice skipped\n";); - } - } - LLVM_DEBUG(dbgs() << "ObjectFileLoader: No compatible slices found in " - "universal binary\n";); - return createStringError(inconvertibleErrorCode(), - "No compatible object found in fat binary: %s", - FilePath.str().c_str()); - } -#endif - - auto ObjOrErr = - object::ObjectFile::createObjectFile(Bin->getMemoryBufferRef()); - if (!ObjOrErr) { - LLVM_DEBUG(dbgs() << "ObjectFileLoader: Failed to create object file\n";); - return ObjOrErr.takeError(); - } - LLVM_DEBUG(dbgs() << "ObjectFileLoader: Detected object file\n";); - - std::unique_ptr<object::ObjectFile> Obj = std::move(*ObjOrErr); - if (!isArchitectureCompatible(*Obj)) { - LLVM_DEBUG(dbgs() << "ObjectFileLoader: Incompatible architecture: " - << FilePath << "\n";); - return createStringError(inconvertibleErrorCode(), - "Incompatible object file: %s", - FilePath.str().c_str()); - } - - LLVM_DEBUG(dbgs() << "ObjectFileLoader: Object file is compatible\n";); - - return object::OwningBinary<object::ObjectFile>(std::move(Obj), - std::move(OwningBin.second)); -} - -template <class ELFT> -bool isELFSharedLibrary(const object::ELFFile<ELFT> &ELFObj) { - if (ELFObj.getHeader().e_type != ELF::ET_DYN) - return false; - - auto PHOrErr = ELFObj.program_headers(); - if (!PHOrErr) { - consumeError(PHOrErr.takeError()); - return true; - } - - for (auto Phdr : *PHOrErr) { - if (Phdr.p_type == ELF::PT_INTERP) - return false; - } - - return true; -} - -bool isSharedLibraryObject(object::ObjectFile &Obj) { - if (Obj.isELF()) { - if (auto *ELF32LE = dyn_cast<object::ELF32LEObjectFile>(&Obj)) - return isELFSharedLibrary(ELF32LE->getELFFile()); - if (auto *ELF64LE = dyn_cast<object::ELF64LEObjectFile>(&Obj)) - return isELFSharedLibrary(ELF64LE->getELFFile()); - if (auto *ELF32BE = dyn_cast<object::ELF32BEObjectFile>(&Obj)) - return isELFSharedLibrary(ELF32BE->getELFFile()); - if (auto *ELF64BE = dyn_cast<object::ELF64BEObjectFile>(&Obj)) - return isELFSharedLibrary(ELF64BE->getELFFile()); - } else if (Obj.isMachO()) { - const object::MachOObjectFile *MachO = - dyn_cast<object::MachOObjectFile>(&Obj); - if (!MachO) { - LLVM_DEBUG(dbgs() << "Failed to cast to MachOObjectFile.\n";); - return false; - } - LLVM_DEBUG({ - bool Result = - MachO->getHeader().filetype == MachO::HeaderFileType::MH_DYLIB; - dbgs() << "Mach-O filetype: " << MachO->getHeader().filetype - << " (MH_DYLIB == " << MachO::HeaderFileType::MH_DYLIB - << "), shared: " << Result << "\n"; - }); - - return MachO->getHeader().filetype == MachO::HeaderFileType::MH_DYLIB; - } else if (Obj.isCOFF()) { - const object::COFFObjectFile *coff = dyn_cast<object::COFFObjectFile>(&Obj); - if (!coff) - return false; - return coff->getCharacteristics() & COFF::IMAGE_FILE_DLL; - } else { - LLVM_DEBUG(dbgs() << "Binary is not an ObjectFile.\n";); - } - - return false; -} - -bool DylibPathValidator::isSharedLibrary(StringRef Path) { - LLVM_DEBUG(dbgs() << "Checking if path is a shared library: " << Path - << "\n";); - - auto FileType = sys::fs::get_file_type(Path, /*Follow*/ true); - if (FileType != sys::fs::file_type::regular_file) { - LLVM_DEBUG(dbgs() << "File type is not a regular file for path: " << Path - << "\n";); - return false; - } - - file_magic MagicCode; - identify_magic(Path, MagicCode); - - // Skip archives. - if (MagicCode == file_magic::archive) - return false; - - // Universal binary handling. -#if defined(__APPLE__) - if (MagicCode == file_magic::macho_universal_binary) { - ObjectFileLoader ObjLoader(Path); - auto ObjOrErr = ObjLoader.getObjectFile(); - if (!ObjOrErr) { - consumeError(ObjOrErr.takeError()); - return false; - } - return isSharedLibraryObject(ObjOrErr.get()); - } -#endif - - // Object file inspection for PE/COFF, ELF, and Mach-O - bool NeedsObjectInspection = -#if defined(_WIN32) - (MagicCode == file_magic::pecoff_executable); -#elif defined(__APPLE__) - (MagicCode == file_magic::macho_fixed_virtual_memory_shared_lib || - MagicCode == file_magic::macho_dynamically_linked_shared_lib || - MagicCode == file_magic::macho_dynamically_linked_shared_lib_stub); -#elif defined(LLVM_ON_UNIX) -#ifdef __CYGWIN__ - (MagicCode == file_magic::pecoff_executable); -#else - (MagicCode == file_magic::elf_shared_object); -#endif -#else -#error "Unsupported platform." -#endif - - if (NeedsObjectInspection) { - ObjectFileLoader ObjLoader(Path); - auto ObjOrErr = ObjLoader.getObjectFile(); - if (!ObjOrErr) { - consumeError(ObjOrErr.takeError()); - return false; - } - return isSharedLibraryObject(ObjOrErr.get()); - } - - LLVM_DEBUG(dbgs() << "Path is not identified as a shared library: " << Path - << "\n";); - return false; -} - -void DylibSubstitutor::configure(StringRef LoaderPath) { - SmallString<512> ExecPath(sys::fs::getMainExecutable(nullptr, nullptr)); - sys::path::remove_filename(ExecPath); - - SmallString<512> LoaderDir; - if (LoaderPath.empty()) { - LoaderDir = ExecPath; - } else { - LoaderDir = LoaderPath.str(); - if (!sys::fs::is_directory(LoaderPath)) - sys::path::remove_filename(LoaderDir); - } - -#ifdef __APPLE__ - Placeholders["@loader_path"] = std::string(LoaderDir); - Placeholders["@executable_path"] = std::string(ExecPath); -#else - Placeholders["$origin"] = std::string(LoaderDir); -#endif -} - -std::optional<std::string> -SearchPathResolver::resolve(StringRef Stem, const DylibSubstitutor &Subst, - DylibPathValidator &Validator) const { - for (const auto &SP : Paths) { - std::string Base = Subst.substitute(SP); - - SmallString<512> FullPath(Base); - if (!PlaceholderPrefix.empty() && - Stem.starts_with_insensitive(PlaceholderPrefix)) - FullPath.append(Stem.drop_front(PlaceholderPrefix.size())); - else - sys::path::append(FullPath, Stem); - - LLVM_DEBUG(dbgs() << "SearchPathResolver::resolve FullPath = " << FullPath - << "\n";); - - if (auto Valid = Validator.validate(FullPath.str())) - return Valid; - } - - return std::nullopt; -} - -std::optional<std::string> -DylibResolverImpl::tryWithExtensions(StringRef LibStem) const { - LLVM_DEBUG(dbgs() << "tryWithExtensions: baseName = " << LibStem << "\n";); - SmallVector<SmallString<256>, 8> Candidates; - - // Add extensions by platform -#if defined(__APPLE__) - Candidates.emplace_back(LibStem); - Candidates.back() += ".dylib"; -#elif defined(_WIN32) - Candidates.emplace_back(LibStem); - Candidates.back() += ".dll"; -#else - Candidates.emplace_back(LibStem); - Candidates.back() += ".so"; -#endif - - // Optionally try "lib" prefix if not already there - StringRef FileName = sys::path::filename(LibStem); - StringRef Base = sys::path::parent_path(LibStem); - if (!FileName.starts_with("lib")) { - SmallString<256> WithPrefix(Base); - if (!WithPrefix.empty()) - sys::path::append(WithPrefix, ""); // ensure separator if needed - WithPrefix += "lib"; - WithPrefix += FileName; - -#if defined(__APPLE__) - WithPrefix += ".dylib"; -#elif defined(_WIN32) - WithPrefix += ".dll"; -#else - WithPrefix += ".so"; -#endif - - Candidates.push_back(std::move(WithPrefix)); - } - - LLVM_DEBUG({ - dbgs() << " Candidates to try:\n"; - for (const auto &C : Candidates) - dbgs() << " " << C << "\n"; - }); - - // Try all variants using tryAllPaths - for (const auto &Name : Candidates) { - - LLVM_DEBUG(dbgs() << " Trying candidate: " << Name << "\n";); - - for (const auto &R : Resolvers) { - if (auto Res = R.resolve(Name, Substitutor, Validator)) - return Res; - } - } - - LLVM_DEBUG(dbgs() << " -> No candidate Resolved.\n";); - - return std::nullopt; -} - -std::optional<std::string> -DylibResolverImpl::resolve(StringRef LibStem, bool VariateLibStem) const { - LLVM_DEBUG(dbgs() << "Resolving library stem: " << LibStem << "\n";); - - // If it is an absolute path, don't try iterate over the paths. - if (sys::path::is_absolute(LibStem)) { - LLVM_DEBUG(dbgs() << " -> Absolute path detected.\n";); - return Validator.validate(LibStem); - } - - if (!LibStem.starts_with_insensitive("@rpath")) { - if (auto norm = Validator.validate(Substitutor.substitute(LibStem))) { - LLVM_DEBUG(dbgs() << " -> Resolved after substitution: " << *norm - << "\n";); - - return norm; - } - } - - for (const auto &R : Resolvers) { - LLVM_DEBUG(dbgs() << " -> Resolving via search path ... \n";); - if (auto Result = R.resolve(LibStem, Substitutor, Validator)) { - LLVM_DEBUG(dbgs() << " -> Resolved via search path: " << *Result - << "\n";); - - return Result; - } - } - - // Expand libStem with paths, extensions, etc. - // std::string foundName; - if (VariateLibStem) { - LLVM_DEBUG(dbgs() << " -> Trying with extensions...\n";); - - if (auto Norm = tryWithExtensions(LibStem)) { - LLVM_DEBUG(dbgs() << " -> Resolved via tryWithExtensions: " << *Norm - << "\n";); - - return Norm; - } - } - - LLVM_DEBUG(dbgs() << " -> Could not resolve: " << LibStem << "\n";); - - return std::nullopt; -} - -#ifndef _WIN32 -mode_t PathResolver::lstatCached(StringRef Path) { - // If already cached - retun cached result - if (auto Cache = LibPathCache->read_lstat(Path)) - return *Cache; - - // Not cached: perform lstat and store - struct stat buf{}; - mode_t st_mode = (lstat(Path.str().c_str(), &buf) == -1) ? 0 : buf.st_mode; - - LibPathCache->insert_lstat(Path, st_mode); - - return st_mode; -} - -std::optional<std::string> PathResolver::readlinkCached(StringRef Path) { - // If already cached - retun cached result - if (auto Cache = LibPathCache->read_link(Path)) - return Cache; - - // If result not in cache - call system function and cache result - char buf[PATH_MAX]; - ssize_t len; - if ((len = readlink(Path.str().c_str(), buf, sizeof(buf))) != -1) { - buf[len] = '\0'; - std::string s(buf); - LibPathCache->insert_link(Path, s); - return s; - } - return std::nullopt; -} - -void createComponent(StringRef Path, StringRef BasePath, bool BaseIsResolved, - SmallVector<StringRef, 16> &Component) { - StringRef Separator = sys::path::get_separator(); - if (!BaseIsResolved) { - if (Path[0] == '~' && - (Path.size() == 1 || sys::path::is_separator(Path[1]))) { - static SmallString<128> HomeP; - if (HomeP.str().empty()) - sys::path::home_directory(HomeP); - StringRef(HomeP).split(Component, Separator, /*MaxSplit*/ -1, - /*KeepEmpty*/ false); - } else if (BasePath.empty()) { - static SmallString<256> CurrentPath; - if (CurrentPath.str().empty()) - sys::fs::current_path(CurrentPath); - StringRef(CurrentPath) - .split(Component, Separator, /*MaxSplit*/ -1, /*KeepEmpty*/ false); - } else { - BasePath.split(Component, Separator, /*MaxSplit*/ -1, - /*KeepEmpty*/ false); - } - } - - Path.split(Component, Separator, /*MaxSplit*/ -1, /*KeepEmpty*/ false); -} - -void normalizePathSegments(SmallVector<StringRef, 16> &PathParts) { - SmallVector<StringRef, 16> NormalizedPath; - for (auto &Part : PathParts) { - if (Part == ".") { - continue; - } else if (Part == "..") { - if (!NormalizedPath.empty() && NormalizedPath.back() != "..") { - NormalizedPath.pop_back(); - } else { - NormalizedPath.push_back(".."); - } - } else { - NormalizedPath.push_back(Part); - } - } - PathParts.swap(NormalizedPath); -} -#endif - -std::optional<std::string> PathResolver::realpathCached(StringRef Path, - std::error_code &EC, - StringRef Base, - bool BaseIsResolved, - long SymLoopLevel) { - EC.clear(); - - if (Path.empty()) { - EC = std::make_error_code(std::errc::no_such_file_or_directory); - LLVM_DEBUG(dbgs() << "PathResolver::realpathCached: Empty path\n";); - - return std::nullopt; - } - - if (SymLoopLevel <= 0) { - EC = std::make_error_code(std::errc::too_many_symbolic_link_levels); - LLVM_DEBUG( - dbgs() << "PathResolver::realpathCached: Too many Symlink levels: " - << Path << "\n";); - - return std::nullopt; - } - - // If already cached - retun cached result - bool isRelative = sys::path::is_relative(Path); - if (!isRelative) { - if (auto Cached = LibPathCache->read_realpath(Path)) { - EC = Cached->ErrnoCode; - if (EC) { - LLVM_DEBUG(dbgs() << "PathResolver::realpathCached: Cached (error) for " - << Path << "\n";); - } else { - LLVM_DEBUG( - dbgs() << "PathResolver::realpathCached: Cached (success) for " - << Path << " => " << Cached->canonicalPath << "\n";); - } - return Cached->canonicalPath.empty() - ? std::nullopt - : std::make_optional(Cached->canonicalPath); - } - } - - LLVM_DEBUG(dbgs() << "PathResolver::realpathCached: Resolving path: " << Path - << "\n";); - - // If result not in cache - call system function and cache result - - StringRef Separator(sys::path::get_separator()); - SmallString<256> Resolved(Separator); -#ifndef _WIN32 - SmallVector<StringRef, 16> Components; - - if (isRelative) { - if (BaseIsResolved) { - Resolved.assign(Base); - LLVM_DEBUG(dbgs() << " Using Resolved base: " << Base << "\n";); - } - createComponent(Path, Base, BaseIsResolved, Components); - } else { - Path.split(Components, Separator, /*MaxSplit*/ -1, /*KeepEmpty*/ false); - } - - normalizePathSegments(Components); - LLVM_DEBUG({ - for (auto &C : Components) - dbgs() << " " << C << " "; - - dbgs() << "\n"; - }); - - // Handle path list items - for (const auto &Component : Components) { - if (Component == ".") - continue; - if (Component == "..") { - // collapse "a/b/../c" to "a/c" - size_t S = Resolved.rfind(Separator); - if (S != llvm::StringRef::npos) - Resolved.resize(S); - if (Resolved.empty()) - Resolved = Separator; - continue; - } - - size_t oldSize = Resolved.size(); - sys::path::append(Resolved, Component); - const char *ResolvedPath = Resolved.c_str(); - LLVM_DEBUG(dbgs() << " Processing Component: " << Component << " => " - << ResolvedPath << "\n";); - mode_t st_mode = lstatCached(ResolvedPath); - - if (S_ISLNK(st_mode)) { - LLVM_DEBUG(dbgs() << " Found symlink: " << ResolvedPath << "\n";); - - auto SymlinkOpt = readlinkCached(ResolvedPath); - if (!SymlinkOpt) { - EC = std::make_error_code(std::errc::no_such_file_or_directory); - LibPathCache->insert_realpath(Path, LibraryPathCache::PathInfo{"", EC}); - LLVM_DEBUG(dbgs() << " Failed to read symlink: " << ResolvedPath - << "\n";); - - return std::nullopt; - } - - StringRef Symlink = *SymlinkOpt; - LLVM_DEBUG(dbgs() << " Symlink points to: " << Symlink << "\n";); - - std::string resolvedBase = ""; - if (sys::path::is_relative(Symlink)) { - Resolved.resize(oldSize); - resolvedBase = Resolved.str().str(); - } - - auto RealSymlink = - realpathCached(Symlink, EC, resolvedBase, - /*BaseIsResolved=*/true, SymLoopLevel - 1); - if (!RealSymlink) { - LibPathCache->insert_realpath(Path, LibraryPathCache::PathInfo{"", EC}); - LLVM_DEBUG(dbgs() << " Failed to resolve symlink target: " << Symlink - << "\n";); - - return std::nullopt; - } - - Resolved.assign(*RealSymlink); - LLVM_DEBUG(dbgs() << " Symlink Resolved to: " << Resolved << "\n";); - - } else if (st_mode == 0) { - EC = std::make_error_code(std::errc::no_such_file_or_directory); - LibPathCache->insert_realpath(Path, LibraryPathCache::PathInfo{"", EC}); - LLVM_DEBUG(dbgs() << " Component does not exist: " << ResolvedPath - << "\n";); - - return std::nullopt; - } - } -#else - sys::fs::real_path(Path, Resolved); // Windows fallback -#endif - - std::string Canonical = Resolved.str().str(); - { - LibPathCache->insert_realpath(Path, LibraryPathCache::PathInfo{ - Canonical, - std::error_code() // success - }); - } - LLVM_DEBUG(dbgs() << "PathResolver::realpathCached: Final Resolved: " << Path - << " => " << Canonical << "\n";); - return Canonical; -} - -void LibraryScanHelper::addBasePath(const std::string &Path, PathType K) { - std::error_code EC; - std::string Canon = resolveCanonical(Path, EC); - if (EC) { - LLVM_DEBUG( - dbgs() - << "LibraryScanHelper::addBasePath: Failed to canonicalize path: " - << Path << "\n";); - return; - } - std::unique_lock<std::shared_mutex> Lock(Mtx); - if (LibSearchPaths.count(Canon)) { - LLVM_DEBUG(dbgs() << "LibraryScanHelper::addBasePath: Already added: " - << Canon << "\n";); - return; - } - K = K == PathType::Unknown ? classifyKind(Canon) : K; - auto SP = std::make_shared<LibrarySearchPath>(Canon, K); - LibSearchPaths[Canon] = SP; - - if (K == PathType::User) { - LLVM_DEBUG(dbgs() << "LibraryScanHelper::addBasePath: Added User path: " - << Canon << "\n";); - UnscannedUsr.push_back(StringRef(SP->BasePath)); - } else { - LLVM_DEBUG(dbgs() << "LibraryScanHelper::addBasePath: Added System path: " - << Canon << "\n";); - UnscannedSys.push_back(StringRef(SP->BasePath)); - } -} - -std::vector<std::shared_ptr<LibrarySearchPath>> -LibraryScanHelper::getNextBatch(PathType K, size_t BatchSize) { - std::vector<std::shared_ptr<LibrarySearchPath>> Result; - auto &Queue = (K == PathType::User) ? UnscannedUsr : UnscannedSys; - - std::unique_lock<std::shared_mutex> Lock(Mtx); - - while (!Queue.empty() && (BatchSize == 0 || Result.size() < BatchSize)) { - StringRef Base = Queue.front(); - auto It = LibSearchPaths.find(Base); - if (It != LibSearchPaths.end()) { - auto &SP = It->second; - ScanState Expected = ScanState::NotScanned; - if (SP->State.compare_exchange_strong(Expected, ScanState::Scanning)) { - Result.push_back(SP); - } - } - Queue.pop_front(); - } - - return Result; -} - -bool LibraryScanHelper::isTrackedBasePath(StringRef Path) const { - std::error_code EC; - std::string Canon = resolveCanonical(Path, EC); - if (EC) - return false; - - std::shared_lock<std::shared_mutex> Lock(Mtx); - return LibSearchPaths.count(Canon) > 0; -} - -bool LibraryScanHelper::leftToScan(PathType K) const { - std::shared_lock<std::shared_mutex> Lock(Mtx); - for (const auto &KV : LibSearchPaths) { - const auto &SP = KV.second; - if (SP->Kind == K && SP->State == ScanState::NotScanned) - return true; - } - return false; -} - -void LibraryScanHelper::resetToScan() { - std::shared_lock<std::shared_mutex> Lock(Mtx); - - for (auto &[_, SP] : LibSearchPaths) { - ScanState Expected = ScanState::Scanned; - - if (!SP->State.compare_exchange_strong(Expected, ScanState::NotScanned)) - continue; - - auto &TargetList = - (SP->Kind == PathType::User) ? UnscannedUsr : UnscannedSys; - TargetList.emplace_back(SP->BasePath); - } -} - -std::vector<std::shared_ptr<LibrarySearchPath>> -LibraryScanHelper::getAllUnits() const { - std::shared_lock<std::shared_mutex> Lock(Mtx); - std::vector<std::shared_ptr<LibrarySearchPath>> Result; - Result.reserve(LibSearchPaths.size()); - for (const auto &[_, SP] : LibSearchPaths) { - Result.push_back(SP); - } - return Result; -} - -std::string LibraryScanHelper::resolveCanonical(StringRef Path, - std::error_code &EC) const { - auto Canon = LibPathResolver->resolve(Path, EC); - return EC ? Path.str() : *Canon; -} - -PathType LibraryScanHelper::classifyKind(StringRef Path) const { - // Detect home directory - const char *Home = getenv("HOME"); - if (Home && Path.find(Home) == 0) - return PathType::User; - - static const std::array<std::string, 5> UserPrefixes = { - "/usr/local", // often used by users for manual installs - "/opt/homebrew", // common on macOS - "/opt/local", // MacPorts - "/home", // Linux home dirs - "/Users", // macOS user dirs - }; - - for (const auto &Prefix : UserPrefixes) { - if (Path.find(Prefix) == 0) - return PathType::User; - } - - return PathType::System; -} - -Expected<LibraryDepsInfo> parseMachODeps(const object::MachOObjectFile &Obj) { - LibraryDepsInfo Libdeps; - LLVM_DEBUG(dbgs() << "Parsing Mach-O dependencies...\n";); - for (const auto &Command : Obj.load_commands()) { - switch (Command.C.cmd) { - case MachO::LC_LOAD_DYLIB: { - MachO::dylib_command dylibCmd = Obj.getDylibIDLoadCommand(Command); - const char *name = Command.Ptr + dylibCmd.dylib.name; - Libdeps.addDep(name); - LLVM_DEBUG(dbgs() << " Found LC_LOAD_DYLIB: " << name << "\n";); - } break; - case MachO::LC_LOAD_WEAK_DYLIB: - case MachO::LC_REEXPORT_DYLIB: - case MachO::LC_LOAD_UPWARD_DYLIB: - case MachO::LC_LAZY_LOAD_DYLIB: - break; - case MachO::LC_RPATH: { - // Extract RPATH - MachO::rpath_command rpathCmd = Obj.getRpathCommand(Command); - const char *rpath = Command.Ptr + rpathCmd.path; - LLVM_DEBUG(dbgs() << " Found LC_RPATH: " << rpath << "\n";); - - SmallVector<StringRef, 4> RawPaths; - SplitString(StringRef(rpath), RawPaths, - sys::EnvPathSeparator == ':' ? ":" : ";"); - - for (const auto &raw : RawPaths) { - Libdeps.addRPath(raw.str()); // Convert to std::string - LLVM_DEBUG(dbgs() << " Parsed RPATH entry: " << raw << "\n";); - } - break; - } - } - } - - return Expected<LibraryDepsInfo>(std::move(Libdeps)); -} - -template <class ELFT> -static Expected<StringRef> getDynamicStrTab(const object::ELFFile<ELFT> &Elf) { - auto DynamicEntriesOrError = Elf.dynamicEntries(); - if (!DynamicEntriesOrError) - return DynamicEntriesOrError.takeError(); - - for (const typename ELFT::Dyn &Dyn : *DynamicEntriesOrError) { - if (Dyn.d_tag == ELF::DT_STRTAB) { - auto MappedAddrOrError = Elf.toMappedAddr(Dyn.getPtr()); - if (!MappedAddrOrError) - return MappedAddrOrError.takeError(); - return StringRef(reinterpret_cast<const char *>(*MappedAddrOrError)); - } - } - - // If the dynamic segment is not present, we fall back on the sections. - auto SectionsOrError = Elf.sections(); - if (!SectionsOrError) - return SectionsOrError.takeError(); - - for (const typename ELFT::Shdr &Sec : *SectionsOrError) { - if (Sec.sh_type == ELF::SHT_DYNSYM) - return Elf.getStringTableForSymtab(Sec); - } - - return make_error<StringError>("dynamic string table not found", - inconvertibleErrorCode()); -} - -template <typename ELFT> -Expected<LibraryDepsInfo> parseELF(const object::ELFFile<ELFT> &Elf) { - LibraryDepsInfo Deps; - Expected<StringRef> StrTabOrErr = getDynamicStrTab(Elf); - if (!StrTabOrErr) - return StrTabOrErr.takeError(); - - const char *Data = StrTabOrErr->data(); - - auto DynamicEntriesOrError = Elf.dynamicEntries(); - if (!DynamicEntriesOrError) { - return DynamicEntriesOrError.takeError(); - } - - for (const typename ELFT::Dyn &Dyn : *DynamicEntriesOrError) { - switch (Dyn.d_tag) { - case ELF::DT_NEEDED: - Deps.addDep(Data + Dyn.d_un.d_val); - break; - case ELF::DT_RPATH: { - SmallVector<StringRef, 4> RawPaths; - SplitString(Data + Dyn.d_un.d_val, RawPaths, - sys::EnvPathSeparator == ':' ? ":" : ";"); - for (const auto &raw : RawPaths) - Deps.addRPath(raw.str()); - break; - } - case ELF::DT_RUNPATH: { - SmallVector<StringRef, 4> RawPaths; - SplitString(Data + Dyn.d_un.d_val, RawPaths, - sys::EnvPathSeparator == ':' ? ":" : ";"); - for (const auto &raw : RawPaths) - Deps.addRunPath(raw.str()); - break; - } - case ELF::DT_FLAGS_1: - // Check if this is not a pie executable. - if (Dyn.d_un.d_val & ELF::DF_1_PIE) - Deps.isPIE = true; - break; - // (Dyn.d_tag == ELF::DT_NULL) continue; - // (Dyn.d_tag == ELF::DT_AUXILIARY || Dyn.d_tag == ELF::DT_FILTER) - default: - break; - } - } - - return Expected<LibraryDepsInfo>(std::move(Deps)); -} - -Expected<LibraryDepsInfo> parseELFDeps(const object::ELFObjectFileBase &Obj) { - using namespace object; - LLVM_DEBUG(dbgs() << "parseELFDeps: Detected ELF object\n";); - if (const auto *ELF = dyn_cast<ELF32LEObjectFile>(&Obj)) - return parseELF(ELF->getELFFile()); - else if (const auto *ELF = dyn_cast<ELF32BEObjectFile>(&Obj)) - return parseELF(ELF->getELFFile()); - else if (const auto *ELF = dyn_cast<ELF64LEObjectFile>(&Obj)) - return parseELF(ELF->getELFFile()); - else if (const auto *ELF = dyn_cast<ELF64BEObjectFile>(&Obj)) - return parseELF(ELF->getELFFile()); - - LLVM_DEBUG(dbgs() << "parseELFDeps: Unknown ELF format\n";); - return createStringError(std::errc::not_supported, "Unknown ELF format"); -} - -Expected<LibraryDepsInfo> LibraryScanner::extractDeps(StringRef FilePath) { - LLVM_DEBUG(dbgs() << "extractDeps: Attempting to open file " << FilePath - << "\n";); - - ObjectFileLoader ObjLoader(FilePath); - auto ObjOrErr = ObjLoader.getObjectFile(); - if (!ObjOrErr) { - LLVM_DEBUG(dbgs() << "extractDeps: Failed to open " << FilePath << "\n";); - return ObjOrErr.takeError(); - } - - object::ObjectFile *Obj = &ObjOrErr.get(); - - if (auto *elfObj = dyn_cast<object::ELFObjectFileBase>(Obj)) { - LLVM_DEBUG(dbgs() << "extractDeps: File " << FilePath - << " is an ELF object\n";); - - return parseELFDeps(*elfObj); - } - - if (auto *macho = dyn_cast<object::MachOObjectFile>(Obj)) { - LLVM_DEBUG(dbgs() << "extractDeps: File " << FilePath - << " is a Mach-O object\n";); - return parseMachODeps(*macho); - } - - if (Obj->isCOFF()) { - // TODO: COFF support - return LibraryDepsInfo(); - } - - LLVM_DEBUG(dbgs() << "extractDeps: Unsupported binary format for file " - << FilePath << "\n";); - return createStringError(inconvertibleErrorCode(), - "Unsupported binary format: %s", - FilePath.str().c_str()); -} - -std::optional<std::string> LibraryScanner::shouldScan(StringRef FilePath) { - std::error_code EC; - - LLVM_DEBUG(dbgs() << "[shouldScan] Checking: " << FilePath << "\n";); - - // [1] Check file existence early - if (!sys::fs::exists(FilePath)) { - LLVM_DEBUG(dbgs() << " -> Skipped: file does not exist.\n";); - - return std::nullopt; - } - - // [2] Resolve to canonical path - auto CanonicalPathOpt = ScanHelper.resolve(FilePath, EC); - if (EC || !CanonicalPathOpt) { - LLVM_DEBUG(dbgs() << " -> Skipped: failed to resolve path (EC=" - << EC.message() << ").\n";); - - return std::nullopt; - } - - const std::string &CanonicalPath = *CanonicalPathOpt; - LLVM_DEBUG(dbgs() << " -> Canonical path: " << CanonicalPath << "\n"); - - // [3] Check if it's a directory — skip directories - if (sys::fs::is_directory(CanonicalPath)) { - LLVM_DEBUG(dbgs() << " -> Skipped: path is a directory.\n";); - - return std::nullopt; - } - - // [4] Skip if it's not a shared library. - if (!DylibPathValidator::isSharedLibrary(CanonicalPath)) { - LLVM_DEBUG(dbgs() << " -> Skipped: not a shared library.\n";); - return std::nullopt; - } - - // [5] Skip if we've already seen this path (via cache) - if (ScanHelper.hasSeenOrMark(CanonicalPath)) { - LLVM_DEBUG(dbgs() << " -> Skipped: already seen.\n";); - - return std::nullopt; - } - - // [6] Already tracked in LibraryManager? - if (LibMgr.hasLibrary(CanonicalPath)) { - LLVM_DEBUG(dbgs() << " -> Skipped: already tracked by LibraryManager.\n";); - - return std::nullopt; - } - - // [7] Run user-defined hook (default: always true) - if (!ShouldScanCall(CanonicalPath)) { - LLVM_DEBUG(dbgs() << " -> Skipped: user-defined hook rejected.\n";); - - return std::nullopt; - } - - LLVM_DEBUG(dbgs() << " -> Accepted: ready to scan " << CanonicalPath - << "\n";); - return CanonicalPath; -} - -void LibraryScanner::handleLibrary(StringRef FilePath, PathType K, int level) { - LLVM_DEBUG(dbgs() << "LibraryScanner::handleLibrary: Scanning: " << FilePath - << ", level=" << level << "\n";); - auto CanonPathOpt = shouldScan(FilePath); - if (!CanonPathOpt) { - LLVM_DEBUG(dbgs() << " Skipped (shouldScan returned false): " << FilePath - << "\n";); - - return; - } - const std::string CanonicalPath = *CanonPathOpt; - - auto DepsOrErr = extractDeps(CanonicalPath); - if (!DepsOrErr) { - LLVM_DEBUG(dbgs() << " Failed to extract deps for: " << CanonicalPath - << "\n";); - handleError(DepsOrErr.takeError()); - return; - } - - LibraryDepsInfo &Deps = *DepsOrErr; - - LLVM_DEBUG({ - dbgs() << " Found deps : \n"; - for (const auto &dep : Deps.deps) - dbgs() << " : " << dep << "\n"; - dbgs() << " Found @rpath : " << Deps.rpath.size() << "\n"; - for (const auto &r : Deps.rpath) - dbgs() << " : " << r << "\n"; - dbgs() << " Found @runpath : \n"; - for (const auto &r : Deps.runPath) - dbgs() << " : " << r << "\n"; - }); - - if (Deps.isPIE && level == 0) { - LLVM_DEBUG(dbgs() << " Skipped PIE executable at top level: " - << CanonicalPath << "\n";); - - return; - } - - bool Added = LibMgr.addLibrary(CanonicalPath, K); - if (!Added) { - LLVM_DEBUG(dbgs() << " Already added: " << CanonicalPath << "\n";); - return; - } - - // Heuristic 1: No RPATH/RUNPATH, skip deps - if (Deps.rpath.empty() && Deps.runPath.empty()) { - LLVM_DEBUG( - dbgs() << "LibraryScanner::handleLibrary: Skipping deps (Heuristic1): " - << CanonicalPath << "\n";); - return; - } - - // Heuristic 2: All RPATH and RUNPATH already tracked - auto allTracked = [&](const auto &Paths) { - LLVM_DEBUG(dbgs() << " Checking : " << Paths.size() << "\n";); - return std::all_of(Paths.begin(), Paths.end(), [&](StringRef P) { - LLVM_DEBUG(dbgs() << " Checking isTrackedBasePath : " << P << "\n";); - return ScanHelper.isTrackedBasePath( - DylibResolver::resolvelinkerFlag(P, CanonicalPath)); - }); - }; - - if (allTracked(Deps.rpath) && allTracked(Deps.runPath)) { - LLVM_DEBUG( - dbgs() << "LibraryScanner::handleLibrary: Skipping deps (Heuristic2): " - << CanonicalPath << "\n";); - return; - } - - DylibPathValidator Validator(ScanHelper.getPathResolver()); - DylibResolver Resolver(Validator); - Resolver.configure(CanonicalPath, - {{Deps.rpath, SearchPathType::RPath}, - {ScanHelper.getSearchPaths(), SearchPathType::UsrOrSys}, - {Deps.runPath, SearchPathType::RunPath}}); - for (StringRef Dep : Deps.deps) { - LLVM_DEBUG(dbgs() << " Resolving dep: " << Dep << "\n";); - auto DepFullOpt = Resolver.resolve(Dep); - if (!DepFullOpt) { - LLVM_DEBUG(dbgs() << " Failed to resolve dep: " << Dep << "\n";); - - continue; - } - LLVM_DEBUG(dbgs() << " Resolved dep to: " << *DepFullOpt << "\n";); - - handleLibrary(*DepFullOpt, K, level + 1); - } -} - -void LibraryScanner::scanBaseDir(std::shared_ptr<LibrarySearchPath> SP) { - if (!sys::fs::is_directory(SP->BasePath) || SP->BasePath.empty()) { - LLVM_DEBUG( - dbgs() << "LibraryScanner::scanBaseDir: Invalid or empty basePath: " - << SP->BasePath << "\n";); - return; - } - - LLVM_DEBUG(dbgs() << "LibraryScanner::scanBaseDir: Scanning directory: " - << SP->BasePath << "\n";); - std::error_code EC; - - SP->State.store(ScanState::Scanning); - - for (sys::fs::directory_iterator It(SP->BasePath, EC), end; It != end && !EC; - It.increment(EC)) { - auto Entry = *It; - if (!Entry.status()) - continue; - - auto Status = *Entry.status(); - if (sys::fs::is_regular_file(Status) || sys::fs::is_symlink_file(Status)) { - LLVM_DEBUG(dbgs() << " Found file: " << Entry.path() << "\n";); - // async support ? - handleLibrary(Entry.path(), SP->Kind); - } - } - - SP->State.store(ScanState::Scanned); -} - -void LibraryScanner::scanNext(PathType K, size_t BatchSize) { - LLVM_DEBUG(dbgs() << "LibraryScanner::scanNext: Scanning next batch of size " - << BatchSize << " for kind " - << (K == PathType::User ? "User" : "System") << "\n";); - - auto SearchPaths = ScanHelper.getNextBatch(K, BatchSize); - for (auto &SP : SearchPaths) { - LLVM_DEBUG(dbgs() << " Scanning unit with basePath: " << SP->BasePath - << "\n";); - - scanBaseDir(SP); - } -} - -} // end namespace llvm::orc diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 488b078..1096e57 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -4082,10 +4082,10 @@ void AssemblyWriter::printTypeIdentities() { /// printFunction - Print all aspects of a function. void AssemblyWriter::printFunction(const Function *F) { - if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out); - if (F->isMaterializable()) Out << "; Materializable\n"; + else if (AnnotationWriter) + AnnotationWriter->emitFunctionAnnot(F, Out); const AttributeList &Attrs = F->getAttributes(); if (Attrs.hasFnAttrs()) { diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt index 1e1d0a6..70c4577 100644 --- a/llvm/lib/MC/CMakeLists.txt +++ b/llvm/lib/MC/CMakeLists.txt @@ -73,9 +73,10 @@ add_llvm_component_library(LLVMMC ${LLVM_MAIN_INCLUDE_DIR}/llvm/MC LINK_COMPONENTS + BinaryFormat + DebugInfoDWARFLowLevel Support TargetParser - BinaryFormat DEPENDS intrinsics_gen diff --git a/llvm/lib/MC/MCSFrame.cpp b/llvm/lib/MC/MCSFrame.cpp index d6fa54c..e0a90df 100644 --- a/llvm/lib/MC/MCSFrame.cpp +++ b/llvm/lib/MC/MCSFrame.cpp @@ -8,6 +8,8 @@ #include "llvm/MC/MCSFrame.h" #include "llvm/BinaryFormat/SFrame.h" +#include "llvm/DebugInfo/DWARF/LowLevel/DWARFCFIProgram.h" +#include "llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectFileInfo.h" @@ -211,8 +213,152 @@ class SFrameEmitterImpl { return true; } + // Technically, the escape data could be anything, but it is commonly a dwarf + // CFI program. Even then, it could contain an arbitrarily complicated Dwarf + // expression. Following gnu-gas, look for certain common cases that could + // invalidate an FDE, emit a warning for those sequences, and don't generate + // an FDE in those cases. Allow any that are known safe. It is likely that + // more thorough test cases could refine this code, but it handles the most + // important ones compatibly with gas. + // Returns true if the CFI escape sequence is safe for sframes. + bool isCFIEscapeSafe(SFrameFDE &FDE, const SFrameFRE &FRE, + const MCCFIInstruction &CFI) { + const MCAsmInfo *AI = Streamer.getContext().getAsmInfo(); + DWARFDataExtractorSimple data(CFI.getValues(), AI->isLittleEndian(), + AI->getCodePointerSize()); + + // Normally, both alignment factors are extracted from the enclosing Dwarf + // FDE or CIE. We don't have one here. Alignments are used for scaling + // factors for ops like CFA_def_cfa_offset_sf. But this particular function + // is only interested in registers. + dwarf::CFIProgram P(/*CodeAlignmentFactor=*/1, + /*DataAlignmentFactor=*/1, + Streamer.getContext().getTargetTriple().getArch()); + uint64_t Offset = 0; + if (P.parse(data, &Offset, CFI.getValues().size())) { + // Not a parsable dwarf expression. Assume the worst. + Streamer.getContext().reportWarning( + CFI.getLoc(), + "skipping SFrame FDE; .cfi_escape with unknown effects"); + return false; + } + + // This loop deals with dwarf::CFIProgram::Instructions. Everywhere else + // this file deals with MCCFIInstructions. + for (const dwarf::CFIProgram::Instruction &I : P) { + switch (I.Opcode) { + case dwarf::DW_CFA_nop: + break; + case dwarf::DW_CFA_val_offset: { + // First argument is a register. Anything that touches CFA, FP, or RA is + // a problem, but allow others through. As an even more special case, + // allow SP + 0. + auto Reg = I.getOperandAsUnsigned(P, 0); + // The parser should have failed in this case. + assert(Reg && "DW_CFA_val_offset with no register."); + bool SPOk = true; + if (*Reg == SPReg) { + auto Opnd = I.getOperandAsSigned(P, 1); + if (!Opnd || *Opnd != 0) + SPOk = false; + } + if (!SPOk || *Reg == RAReg || *Reg == FPReg) { + StringRef RN = *Reg == SPReg + ? "SP reg " + : (*Reg == FPReg ? "FP reg " : "RA reg "); + Streamer.getContext().reportWarning( + CFI.getLoc(), + Twine( + "skipping SFrame FDE; .cfi_escape DW_CFA_val_offset with ") + + RN + Twine(*Reg)); + return false; + } + } break; + case dwarf::DW_CFA_expression: { + // First argument is a register. Anything that touches CFA, FP, or RA is + // a problem, but allow others through. + auto Reg = I.getOperandAsUnsigned(P, 0); + if (!Reg) { + Streamer.getContext().reportWarning( + CFI.getLoc(), + "skipping SFrame FDE; .cfi_escape with unknown effects"); + return false; + } + if (*Reg == SPReg || *Reg == RAReg || *Reg == FPReg) { + StringRef RN = *Reg == SPReg + ? "SP reg " + : (*Reg == FPReg ? "FP reg " : "RA reg "); + Streamer.getContext().reportWarning( + CFI.getLoc(), + Twine( + "skipping SFrame FDE; .cfi_escape DW_CFA_expression with ") + + RN + Twine(*Reg)); + return false; + } + } break; + case dwarf::DW_CFA_GNU_args_size: { + auto Size = I.getOperandAsSigned(P, 0); + // Zero size doesn't affect the cfa. + if (Size && *Size == 0) + break; + if (FRE.Info.getBaseRegister() != BaseReg::FP) { + Streamer.getContext().reportWarning( + CFI.getLoc(), + Twine("skipping SFrame FDE; .cfi_escape DW_CFA_GNU_args_size " + "with non frame-pointer CFA")); + return false; + } + } break; + // Cases that gas doesn't specially handle. TODO: Some of these could be + // analyzed and handled instead of just punting. But these are uncommon, + // or should be written as normal cfi directives. Some will need fixes to + // the scaling factor. + case dwarf::DW_CFA_advance_loc: + case dwarf::DW_CFA_offset: + case dwarf::DW_CFA_restore: + case dwarf::DW_CFA_set_loc: + case dwarf::DW_CFA_advance_loc1: + case dwarf::DW_CFA_advance_loc2: + case dwarf::DW_CFA_advance_loc4: + case dwarf::DW_CFA_offset_extended: + case dwarf::DW_CFA_restore_extended: + case dwarf::DW_CFA_undefined: + case dwarf::DW_CFA_same_value: + case dwarf::DW_CFA_register: + case dwarf::DW_CFA_remember_state: + case dwarf::DW_CFA_restore_state: + case dwarf::DW_CFA_def_cfa: + case dwarf::DW_CFA_def_cfa_register: + case dwarf::DW_CFA_def_cfa_offset: + case dwarf::DW_CFA_def_cfa_expression: + case dwarf::DW_CFA_offset_extended_sf: + case dwarf::DW_CFA_def_cfa_sf: + case dwarf::DW_CFA_def_cfa_offset_sf: + case dwarf::DW_CFA_val_offset_sf: + case dwarf::DW_CFA_val_expression: + case dwarf::DW_CFA_MIPS_advance_loc8: + case dwarf::DW_CFA_AARCH64_negate_ra_state_with_pc: + case dwarf::DW_CFA_AARCH64_negate_ra_state: + case dwarf::DW_CFA_LLVM_def_aspace_cfa: + case dwarf::DW_CFA_LLVM_def_aspace_cfa_sf: + Streamer.getContext().reportWarning( + CFI.getLoc(), "skipping SFrame FDE; .cfi_escape " + "CFA expression with unknown side effects"); + return false; + default: + // Dwarf expression was only partially valid, and user could have + // written anything. + Streamer.getContext().reportWarning( + CFI.getLoc(), + "skipping SFrame FDE; .cfi_escape with unknown effects"); + return false; + } + } + return true; + } + // Add the effects of CFI to the current FDE, creating a new FRE when - // necessary. + // necessary. Return true if the CFI is representable in the sframe format. bool handleCFI(SFrameFDE &FDE, SFrameFRE &FRE, const MCCFIInstruction &CFI) { switch (CFI.getOperation()) { case MCCFIInstruction::OpDefCfaRegister: @@ -265,10 +411,11 @@ class SFrameEmitterImpl { FRE = FDE.SaveState.pop_back_val(); return true; case MCCFIInstruction::OpEscape: - // TODO: Implement. Will use FDE. - return true; + // This is a string of bytes that contains an arbitrary dwarf-expression + // that may or may not affect unwind info. + return isCFIEscapeSafe(FDE, FRE, CFI); default: - // Instructions that don't affect the CFA, RA, and SP can be safely + // Instructions that don't affect the CFA, RA, and FP can be safely // ignored. return true; } diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index f74e52a..c27f627 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -89,14 +89,32 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) { return A.Name.size() < B.Name.size(); }); } + + for (const auto &G : reverse(Globs)) { + StringRef Prefix = G.Pattern.prefix(); + + auto &V = PrefixToGlob.emplace(Prefix).first->second; + V.emplace_back(&G); + } } void SpecialCaseList::GlobMatcher::match( StringRef Query, llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { - for (const auto &G : reverse(Globs)) - if (G.Pattern.match(Query)) - return Cb(G.Name, G.LineNo); + if (!PrefixToGlob.empty()) { + for (const auto &[_, V] : PrefixToGlob.find_prefixes(Query)) { + for (const auto *G : V) { + if (G->Pattern.match(Query)) { + Cb(G->Name, G->LineNo); + // As soon as we find a match in the vector, we can break for this + // vector, since the globs are already sorted by priority within the + // prefix group. However, we continue searching other prefix groups in + // the map, as they may contain a better match overall. + break; + } + } + } + } } SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash) diff --git a/llvm/lib/Support/Timer.cpp b/llvm/lib/Support/Timer.cpp index 67483ba..9d45096 100644 --- a/llvm/lib/Support/Timer.cpp +++ b/llvm/lib/Support/Timer.cpp @@ -240,7 +240,8 @@ private: getGroupEntry(StringRef GroupName, StringRef GroupDescription) { std::pair<TimerGroup *, Name2TimerMap> &GroupEntry = Map[GroupName]; if (!GroupEntry.first) - GroupEntry.first = new TimerGroup(GroupName, GroupDescription); + GroupEntry.first = + new TimerGroup(GroupName, GroupDescription, /*PrintOnExit=*/true); return GroupEntry; } @@ -270,9 +271,10 @@ TimerGroup &NamedRegionTimer::getNamedTimerGroup(StringRef GroupName, static TimerGroup *TimerGroupList = nullptr; TimerGroup::TimerGroup(StringRef Name, StringRef Description, - sys::SmartMutex<true> &lock) + sys::SmartMutex<true> &lock, bool PrintOnExit) : Name(Name.begin(), Name.end()), - Description(Description.begin(), Description.end()) { + Description(Description.begin(), Description.end()), + PrintOnExit(PrintOnExit) { // Add the group to TimerGroupList. sys::SmartScopedLock<true> L(lock); if (TimerGroupList) @@ -282,12 +284,12 @@ TimerGroup::TimerGroup(StringRef Name, StringRef Description, TimerGroupList = this; } -TimerGroup::TimerGroup(StringRef Name, StringRef Description) - : TimerGroup(Name, Description, timerLock()) {} +TimerGroup::TimerGroup(StringRef Name, StringRef Description, bool PrintOnExit) + : TimerGroup(Name, Description, timerLock(), PrintOnExit) {} TimerGroup::TimerGroup(StringRef Name, StringRef Description, - const StringMap<TimeRecord> &Records) - : TimerGroup(Name, Description) { + const StringMap<TimeRecord> &Records, bool PrintOnExit) + : TimerGroup(Name, Description, PrintOnExit) { TimersToPrint.reserve(Records.size()); for (const auto &P : Records) TimersToPrint.emplace_back(P.getValue(), std::string(P.getKey()), @@ -301,7 +303,7 @@ TimerGroup::~TimerGroup() { while (FirstTimer) removeTimer(*FirstTimer); - if (!TimersToPrint.empty()) { + if (!TimersToPrint.empty() && PrintOnExit) { std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile(); PrintQueuedTimers(*OutStream); } @@ -530,7 +532,7 @@ public: sys::SmartMutex<true> TimerLock; TimerGroup DefaultTimerGroup{"misc", "Miscellaneous Ungrouped Timers", - TimerLock}; + TimerLock, /*PrintOnExit=*/true}; SignpostEmitter Signposts; // Order of these members and initialization below is important. For example diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 018ef31..d16b116 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9002,12 +9002,12 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI, } static SMECallAttrs -getSMECallAttrs(const Function &Caller, const AArch64TargetLowering &TLI, +getSMECallAttrs(const Function &Caller, const RTLIB::RuntimeLibcallsInfo &RTLCI, const TargetLowering::CallLoweringInfo &CLI) { if (CLI.CB) - return SMECallAttrs(*CLI.CB, &TLI); + return SMECallAttrs(*CLI.CB, &RTLCI); if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) - return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol(), TLI)); + return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol(), RTLCI)); return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(SMEAttrs::Normal)); } @@ -9029,7 +9029,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( // SME Streaming functions are not eligible for TCO as they may require // the streaming mode or ZA to be restored after returning from the call. - SMECallAttrs CallAttrs = getSMECallAttrs(CallerF, *this, CLI); + SMECallAttrs CallAttrs = + getSMECallAttrs(CallerF, getRuntimeLibcallsInfo(), CLI); if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() || CallAttrs.requiresPreservingAllZAState() || CallAttrs.caller().hasStreamingBody()) @@ -9454,7 +9455,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } // Determine whether we need any streaming mode changes. - SMECallAttrs CallAttrs = getSMECallAttrs(MF.getFunction(), *this, CLI); + SMECallAttrs CallAttrs = + getSMECallAttrs(MF.getFunction(), getRuntimeLibcallsInfo(), CLI); std::optional<unsigned> ZAMarkerNode; bool UseNewSMEABILowering = getTM().useNewSMEABILowering(); @@ -26723,11 +26725,34 @@ static SDValue performDUPCombine(SDNode *N, } if (N->getOpcode() == AArch64ISD::DUP) { + SDValue Op = N->getOperand(0); + + // Optimize DUP(extload/zextload i8/i16/i32) to avoid GPR->FPR transfer. + // For example: + // v4i32 = DUP (i32 (zextloadi8 addr)) + // => + // v4i32 = SCALAR_TO_VECTOR (i32 (zextloadi8 addr)) ; Matches to ldr b0 + // v4i32 = DUPLANE32 (v4i32), 0 + if (auto *LD = dyn_cast<LoadSDNode>(Op)) { + ISD::LoadExtType ExtType = LD->getExtensionType(); + EVT MemVT = LD->getMemoryVT(); + EVT ElemVT = VT.getVectorElementType(); + if ((ExtType == ISD::EXTLOAD || ExtType == ISD::ZEXTLOAD) && + (MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) && + ElemVT != MemVT && LD->hasOneUse()) { + EVT Vec128VT = EVT::getVectorVT(*DCI.DAG.getContext(), ElemVT, + 128 / ElemVT.getSizeInBits()); + SDValue ScalarToVec = + DCI.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, Vec128VT, Op); + return DCI.DAG.getNode(getDUPLANEOp(ElemVT), DL, VT, ScalarToVec, + DCI.DAG.getConstant(0, DL, MVT::i64)); + } + } + // If the instruction is known to produce a scalar in SIMD registers, we can // duplicate it across the vector lanes using DUPLANE instead of moving it // to a GPR first. For example, this allows us to handle: // v4i32 = DUP (i32 (FCMGT (f32, f32))) - SDValue Op = N->getOperand(0); // FIXME: Ideally, we should be able to handle all instructions that // produce a scalar value in FPRs. if (Op.getOpcode() == AArch64ISD::FCMEQ || @@ -29496,11 +29521,6 @@ AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { if (Subtarget->isTargetAndroid()) return UseTlsOffset(IRB, 0x48); - // Fuchsia is similar. - // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value. - if (Subtarget->isTargetFuchsia()) - return UseTlsOffset(IRB, -0x8); - return TargetLowering::getSafeStackPointerLocation(IRB); } @@ -29818,7 +29838,7 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const { // Checks to allow the use of SME instructions if (auto *Base = dyn_cast<CallBase>(&Inst)) { - auto CallAttrs = SMECallAttrs(*Base, this); + auto CallAttrs = SMECallAttrs(*Base, &getRuntimeLibcallsInfo()); if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() || CallAttrs.requiresPreservingZT0() || CallAttrs.requiresPreservingAllZAState()) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index d5117da..457e540 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5151,7 +5151,15 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // GPR32 zeroing if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) { - if (Subtarget.hasZeroCycleZeroingGPR32()) { + if (Subtarget.hasZeroCycleZeroingGPR64() && + !Subtarget.hasZeroCycleZeroingGPR32()) { + MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + assert(DestRegX.isValid() && "Destination super-reg not valid"); + BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestRegX) + .addImm(0) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + } else if (Subtarget.hasZeroCycleZeroingGPR32()) { BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) .addImm(0) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index a352096..b9e299e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4022,22 +4022,6 @@ defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; -// load zero-extended i32, bitcast to f64 -def : Pat<(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), - (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; -// load zero-extended i16, bitcast to f64 -def : Pat<(f64 (bitconvert (i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), - (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; -// load zero-extended i8, bitcast to f64 -def : Pat<(f64 (bitconvert (i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), - (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; -// load zero-extended i16, bitcast to f32 -def : Pat<(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), - (SUBREG_TO_REG (i32 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; -// load zero-extended i8, bitcast to f32 -def : Pat<(f32 (bitconvert (i32 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), - (SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; - // Pre-fetch. def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", [(AArch64Prefetch timm:$Rt, @@ -4389,6 +4373,64 @@ def : Pat <(v1i64 (scalar_to_vector (i64 (load (ro64.Xpat GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend))))), (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend)>; +// Patterns for bitconvert or scalar_to_vector of load operations. +// Enables direct SIMD register loads for small integer types (i8/i16) that are +// naturally zero-extended to i32/i64. +multiclass ExtLoad8_16AllModes<ValueType OutTy, ValueType InnerTy, + SDPatternOperator OuterOp, + PatFrags LoadOp8, PatFrags LoadOp16> { + // 8-bit loads. + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), + (SUBREG_TO_REG (i64 0), (LDURBi GPR64sp:$Rn, simm9:$offset), bsub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$extend))))), + (SUBREG_TO_REG (i64 0), (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$extend), bsub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$extend))))), + (SUBREG_TO_REG (i64 0), (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$extend), bsub)>; + + // 16-bit loads. + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), + (SUBREG_TO_REG (i64 0), (LDURHi GPR64sp:$Rn, simm9:$offset), hsub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$extend))))), + (SUBREG_TO_REG (i64 0), (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$extend), hsub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$extend))))), + (SUBREG_TO_REG (i64 0), (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$extend), hsub)>; +} + +// Extended multiclass that includes 32-bit loads in addition to 8-bit and 16-bit. +multiclass ExtLoad8_16_32AllModes<ValueType OutTy, ValueType InnerTy, + SDPatternOperator OuterOp, + PatFrags LoadOp8, PatFrags LoadOp16, PatFrags LoadOp32> { + defm : ExtLoad8_16AllModes<OutTy, InnerTy, OuterOp, LoadOp8, LoadOp16>; + + // 32-bit loads. + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), + (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), + (SUBREG_TO_REG (i64 0), (LDURSi GPR64sp:$Rn, simm9:$offset), ssub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$extend))))), + (SUBREG_TO_REG (i64 0), (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$extend), ssub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$extend))))), + (SUBREG_TO_REG (i64 0), (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$extend), ssub)>; +} + +// Instantiate bitconvert patterns for floating-point types. +defm : ExtLoad8_16AllModes<f32, i32, bitconvert, zextloadi8, zextloadi16>; +defm : ExtLoad8_16_32AllModes<f64, i64, bitconvert, zextloadi8, zextloadi16, zextloadi32>; + +// Instantiate scalar_to_vector patterns for all vector types. +defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, zextloadi8, zextloadi16>; +defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, extloadi8, extloadi16>; +defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, zextloadi8, zextloadi16>; +defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, extloadi8, extloadi16>; +defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, zextloadi8, zextloadi16>; +defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, extloadi8, extloadi16>; +defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, zextloadi8, zextloadi16, zextloadi32>; +defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, extloadi8, extloadi16, extloadi32>; + // Pre-fetch. defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", [(AArch64Prefetch timm:$Rt, diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td index bdde8e3..2387f17 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -2762,11 +2762,11 @@ def : InstRW<[V2Write_11c_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>; def : InstRW<[V2Write_11c_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>; // Non temporal store, scalar + imm -def : InstRW<[V2Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>; +def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^STNT1[BHWD]_ZRI$")>; // Non temporal store, scalar + scalar -def : InstRW<[V2Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>; -def : InstRW<[V2Write_2c_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>; +def : InstRW<[V2Write_2c_1L01_1S_1V01], (instrs STNT1H_ZRR)>; +def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^STNT1[BWD]_ZRR$")>; // Scatter non temporal store, vector + scalar 32-bit element size def : InstRW<[V2Write_4c_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index d50af11..fede586 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -224,7 +224,8 @@ static cl::opt<bool> EnableScalableAutovecInStreamingMode( static bool isSMEABIRoutineCall(const CallInst &CI, const AArch64TargetLowering &TLI) { const auto *F = CI.getCalledFunction(); - return F && SMEAttrs(F->getName(), TLI).isSMEABIRoutine(); + return F && + SMEAttrs(F->getName(), TLI.getRuntimeLibcallsInfo()).isSMEABIRoutine(); } /// Returns true if the function has explicit operations that can only be @@ -355,7 +356,7 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call, // change only once and avoid inlining of G into F. SMEAttrs FAttrs(*F); - SMECallAttrs CallAttrs(Call, getTLI()); + SMECallAttrs CallAttrs(Call, &getTLI()->getRuntimeLibcallsInfo()); if (SMECallAttrs(FAttrs, CallAttrs.callee()).requiresSMChange()) { if (F == Call.getCaller()) // (1) diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp index d71f728..085c8588 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp @@ -75,8 +75,8 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) { } void SMEAttrs::addKnownFunctionAttrs(StringRef FuncName, - const AArch64TargetLowering &TLI) { - RTLIB::LibcallImpl Impl = TLI.getSupportedLibcallImpl(FuncName); + const RTLIB::RuntimeLibcallsInfo &RTLCI) { + RTLIB::LibcallImpl Impl = RTLCI.getSupportedLibcallImpl(FuncName); if (Impl == RTLIB::Unsupported) return; unsigned KnownAttrs = SMEAttrs::Normal; @@ -124,21 +124,22 @@ bool SMECallAttrs::requiresSMChange() const { return true; } -SMECallAttrs::SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI) +SMECallAttrs::SMECallAttrs(const CallBase &CB, + const RTLIB::RuntimeLibcallsInfo *RTLCI) : CallerFn(*CB.getFunction()), CalledFn(SMEAttrs::Normal), Callsite(CB.getAttributes()), IsIndirect(CB.isIndirectCall()) { if (auto *CalledFunction = CB.getCalledFunction()) - CalledFn = SMEAttrs(*CalledFunction, TLI); - - // An `invoke` of an agnostic ZA function may not return normally (it may - // resume in an exception block). In this case, it acts like a private ZA - // callee and may require a ZA save to be set up before it is called. - if (isa<InvokeInst>(CB)) - CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false); + CalledFn = SMEAttrs(*CalledFunction, RTLCI); // FIXME: We probably should not allow SME attributes on direct calls but // clang duplicates streaming mode attributes at each callsite. assert((IsIndirect || ((Callsite.withoutPerCallsiteFlags() | CalledFn) == CalledFn)) && "SME attributes at callsite do not match declaration"); + + // An `invoke` of an agnostic ZA function may not return normally (it may + // resume in an exception block). In this case, it acts like a private ZA + // callee and may require a ZA save to be set up before it is called. + if (isa<InvokeInst>(CB)) + CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false); } diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h index d26e3cd..28c397e 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h @@ -12,8 +12,9 @@ #include "llvm/IR/Function.h" namespace llvm { - -class AArch64TargetLowering; +namespace RTLIB { +struct RuntimeLibcallsInfo; +} class Function; class CallBase; @@ -52,14 +53,14 @@ public: SMEAttrs() = default; SMEAttrs(unsigned Mask) { set(Mask); } - SMEAttrs(const Function &F, const AArch64TargetLowering *TLI = nullptr) + SMEAttrs(const Function &F, const RTLIB::RuntimeLibcallsInfo *RTLCI = nullptr) : SMEAttrs(F.getAttributes()) { - if (TLI) - addKnownFunctionAttrs(F.getName(), *TLI); + if (RTLCI) + addKnownFunctionAttrs(F.getName(), *RTLCI); } SMEAttrs(const AttributeList &L); - SMEAttrs(StringRef FuncName, const AArch64TargetLowering &TLI) { - addKnownFunctionAttrs(FuncName, TLI); + SMEAttrs(StringRef FuncName, const RTLIB::RuntimeLibcallsInfo &RTLCI) { + addKnownFunctionAttrs(FuncName, RTLCI); }; void set(unsigned M, bool Enable = true) { @@ -157,7 +158,7 @@ public: private: void addKnownFunctionAttrs(StringRef FuncName, - const AArch64TargetLowering &TLI); + const RTLIB::RuntimeLibcallsInfo &RTLCI); void validate() const; }; @@ -175,7 +176,7 @@ public: SMEAttrs Callsite = SMEAttrs::Normal) : CallerFn(Caller), CalledFn(Callee), Callsite(Callsite) {} - SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI); + SMECallAttrs(const CallBase &CB, const RTLIB::RuntimeLibcallsInfo *RTLCI); SMEAttrs &caller() { return CallerFn; } SMEAttrs &callee() { return IsIndirect ? Callsite : CalledFn; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index e8b211f..7f00ead 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -176,6 +176,19 @@ def binop_s64_with_s32_mask_combines : GICombineGroup<[ combine_or_s64_with_s32_mask, combine_and_s64_with_s32_mask ]>; +// (or i64:x, (zext i32:y)) -> i64:(merge (or lo_32(x), i32:y), hi_32(x)) +// (or (zext i32:y), i64:x) -> i64:(merge (or lo_32(x), i32:y), hi_32(x)) +def or_s64_zext_s32_frag : GICombinePatFrag<(outs root:$dst), (ins $src_s64, $src_s32), + [(pattern (G_OR $dst, i64:$src_s64, i64:$zext_val), (G_ZEXT i64:$zext_val, i32:$src_s32)), + (pattern (G_OR $dst, i64:$zext_val, i64:$src_s64), (G_ZEXT i64:$zext_val, i32:$src_s32))]>; + +def combine_or_s64_s32 : GICombineRule< + (defs root:$dst), + (match (or_s64_zext_s32_frag $dst, i64:$x, i32:$y):$dst), + (apply (G_UNMERGE_VALUES $x_lo, $x_hi, $x), + (G_OR $or, $x_lo, $y), + (G_MERGE_VALUES $dst, $or, $x_hi))>; + let Predicates = [Has16BitInsts, NotHasMed3_16] in { // For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This // saves one instruction compared to the promotion. @@ -206,7 +219,7 @@ def AMDGPUPreLegalizerCombiner: GICombiner< "AMDGPUPreLegalizerCombinerImpl", [all_combines, combine_fmul_with_select_to_fldexp, clamp_i64_to_i16, foldable_fneg, combine_shuffle_vector_to_build_vector, - binop_s64_with_s32_mask_combines]> { + binop_s64_with_s32_mask_combines, combine_or_s64_s32]> { let CombineAllMethodName = "tryCombineAllImpl"; } @@ -215,7 +228,7 @@ def AMDGPUPostLegalizerCombiner: GICombiner< [all_combines, gfx6gfx7_combines, gfx8_combines, combine_fmul_with_select_to_fldexp, uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg, rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64, - binop_s64_with_s32_mask_combines]> { + binop_s64_with_s32_mask_combines, combine_or_s64_s32]> { let CombineAllMethodName = "tryCombineAllImpl"; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 596a895..1a13b22 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -976,9 +976,25 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, FPOpActions.clampMaxNumElementsStrict(0, S32, 2); } + auto &MinNumMaxNumIeee = + getActionDefinitionsBuilder({G_FMINNUM_IEEE, G_FMAXNUM_IEEE}); + + if (ST.hasVOP3PInsts()) { + MinNumMaxNumIeee.legalFor(FPTypesPK16) + .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) + .clampMaxNumElements(0, S16, 2) + .clampScalar(0, S16, S64) + .scalarize(0); + } else if (ST.has16BitInsts()) { + MinNumMaxNumIeee.legalFor(FPTypes16).clampScalar(0, S16, S64).scalarize(0); + } else { + MinNumMaxNumIeee.legalFor(FPTypesBase) + .clampScalar(0, S32, S64) + .scalarize(0); + } + auto &MinNumMaxNum = getActionDefinitionsBuilder( - {G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM, G_FMINNUM_IEEE, - G_FMAXNUM_IEEE}); + {G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM}); if (ST.hasVOP3PInsts()) { MinNumMaxNum.customFor(FPTypesPK16) @@ -2136,9 +2152,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalFor(FPTypesPK16) .clampMaxNumElements(0, S16, 2) .scalarize(0); + } else if (ST.hasVOP3PInsts()) { + getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}) + .lowerFor({V2S16}) + .clampMaxNumElementsStrict(0, S16, 2) + .scalarize(0) + .lower(); } else { - // TODO: Implement - getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower(); + getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}) + .scalarize(0) + .clampScalar(0, S32, S64) + .lower(); } getActionDefinitionsBuilder({G_MEMCPY, G_MEMCPY_INLINE, G_MEMMOVE, G_MEMSET}) @@ -2195,8 +2219,6 @@ bool AMDGPULegalizerInfo::legalizeCustom( case TargetOpcode::G_FMAXNUM: case TargetOpcode::G_FMINIMUMNUM: case TargetOpcode::G_FMAXIMUMNUM: - case TargetOpcode::G_FMINNUM_IEEE: - case TargetOpcode::G_FMAXNUM_IEEE: return legalizeMinNumMaxNum(Helper, MI); case TargetOpcode::G_EXTRACT_VECTOR_ELT: return legalizeExtractVectorElt(MI, MRI, B); @@ -2817,23 +2839,8 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper, MachineFunction &MF = Helper.MIRBuilder.getMF(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE || - MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE; - - // With ieee_mode disabled, the instructions have the correct behavior - // already for G_FMINIMUMNUM/G_FMAXIMUMNUM. - // - // FIXME: G_FMINNUM/G_FMAXNUM should match the behavior with ieee_mode - // enabled. - if (!MFI->getMode().IEEE) { - if (MI.getOpcode() == AMDGPU::G_FMINIMUMNUM || - MI.getOpcode() == AMDGPU::G_FMAXIMUMNUM) - return true; - - return !IsIEEEOp; - } - - if (IsIEEEOp) + // With ieee_mode disabled, the instructions have the correct behavior. + if (!MFI->getMode().IEEE) return true; return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 53be167..10d4cd5 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -6546,23 +6546,25 @@ def KCFI_CHECK_ARM : PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>, Sched<[]>, Requires<[IsARM]> { - let Size = 28; // 7 instructions (bic, ldr, 4x eor, beq, udf) + let Size = 40; // worst-case 10 instructions @ 4 bytes each + // (push, bic, ldr, 4x eor, pop, beq, udf) } def KCFI_CHECK_Thumb2 : PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>, Sched<[]>, Requires<[IsThumb2]> { - let Size = - 32; // worst-case 9 instructions (push, bic, ldr, 4x eor, pop, beq.w, udf) + let Size = 34; // worst-case (push.w[2], bic[4], ldr[4], 4x eor[16], pop.w[2], + // beq.w[4], udf[2]) } def KCFI_CHECK_Thumb1 : PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>, Sched<[]>, Requires<[IsThumb1Only]> { - let Size = 50; // worst-case 25 instructions (pushes, bic helper, type - // building, cmp, pops) + let Size = 38; // worst-case 19 instructions @ 2 bytes each + // (2x push, 3x bic-helper, subs+ldr, 13x type-building, cmp, + // 2x pop, beq, bkpt) } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp index 42e90f0..d6fa65f 100644 --- a/llvm/lib/Target/DirectX/DXILPrepare.cpp +++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// /// -/// \file This file contains pases and utilities to convert a modern LLVM +/// \file This file contains passes and utilities to convert a modern LLVM /// module into a module compatible with the LLVM 3.7-based DirectX Intermediate /// Language (DXIL). //===----------------------------------------------------------------------===// @@ -16,7 +16,6 @@ #include "DirectX.h" #include "DirectXIRPasses/PointerTypeAnalysis.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/Analysis/DXILMetadataAnalysis.h" #include "llvm/Analysis/DXILResource.h" @@ -27,7 +26,6 @@ #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/VersionTuple.h" #define DEBUG_TYPE "dxil-prepare" @@ -116,31 +114,6 @@ static void removeStringFunctionAttributes(Function &F, F.removeRetAttrs(DeadAttrs); } -static void cleanModuleFlags(Module &M) { - NamedMDNode *MDFlags = M.getModuleFlagsMetadata(); - if (!MDFlags) - return; - - SmallVector<llvm::Module::ModuleFlagEntry> FlagEntries; - M.getModuleFlagsMetadata(FlagEntries); - bool Updated = false; - for (auto &Flag : FlagEntries) { - // llvm 3.7 only supports behavior up to AppendUnique. - if (Flag.Behavior <= Module::ModFlagBehavior::AppendUnique) - continue; - Flag.Behavior = Module::ModFlagBehavior::Warning; - Updated = true; - } - - if (!Updated) - return; - - MDFlags->eraseFromParent(); - - for (auto &Flag : FlagEntries) - M.addModuleFlag(Flag.Behavior, Flag.Key->getString(), Flag.Val); -} - class DXILPrepareModule : public ModulePass { static Value *maybeGenerateBitcast(IRBuilder<> &Builder, @@ -202,15 +175,6 @@ class DXILPrepareModule : public ModulePass { Builder.getPtrTy(PtrTy->getAddressSpace()))); } - static std::array<unsigned, 6> getCompatibleInstructionMDs(llvm::Module &M) { - return {M.getMDKindID("dx.nonuniform"), - M.getMDKindID("dx.controlflow.hints"), - M.getMDKindID("dx.precise"), - llvm::LLVMContext::MD_range, - llvm::LLVMContext::MD_alias_scope, - llvm::LLVMContext::MD_noalias}; - } - public: bool runOnModule(Module &M) override { PointerTypeMap PointerTypes = PointerTypeAnalysis::run(M); @@ -224,10 +188,7 @@ public: const dxil::ModuleMetadataInfo MetadataInfo = getAnalysis<DXILMetadataAnalysisWrapperPass>().getModuleMetadata(); VersionTuple ValVer = MetadataInfo.ValidatorVersion; - bool SkipValidation = ValVer.getMajor() == 0 && ValVer.getMinor() == 0; - - // construct allowlist of valid metadata node kinds - std::array<unsigned, 6> DXILCompatibleMDs = getCompatibleInstructionMDs(M); + bool AllowExperimental = ValVer.getMajor() == 0 && ValVer.getMinor() == 0; for (auto &F : M.functions()) { F.removeFnAttrs(AttrMask); @@ -235,7 +196,7 @@ public: // Only remove string attributes if we are not skipping validation. // This will reserve the experimental attributes when validation version // is 0.0 for experiment mode. - removeStringFunctionAttributes(F, SkipValidation); + removeStringFunctionAttributes(F, AllowExperimental); for (size_t Idx = 0, End = F.arg_size(); Idx < End; ++Idx) F.removeParamAttrs(Idx, AttrMask); @@ -243,11 +204,17 @@ public: IRBuilder<> Builder(&BB); for (auto &I : make_early_inc_range(BB)) { - I.dropUnknownNonDebugMetadata(DXILCompatibleMDs); + if (auto *CB = dyn_cast<CallBase>(&I)) { + CB->removeFnAttrs(AttrMask); + CB->removeRetAttrs(AttrMask); + for (size_t Idx = 0, End = CB->arg_size(); Idx < End; ++Idx) + CB->removeParamAttrs(Idx, AttrMask); + continue; + } // Emtting NoOp bitcast instructions allows the ValueEnumerator to be // unmodified as it reserves instruction IDs during contruction. - if (auto LI = dyn_cast<LoadInst>(&I)) { + if (auto *LI = dyn_cast<LoadInst>(&I)) { if (Value *NoOpBitcast = maybeGenerateBitcast( Builder, PointerTypes, I, LI->getPointerOperand(), LI->getType())) { @@ -257,7 +224,7 @@ public: } continue; } - if (auto SI = dyn_cast<StoreInst>(&I)) { + if (auto *SI = dyn_cast<StoreInst>(&I)) { if (Value *NoOpBitcast = maybeGenerateBitcast( Builder, PointerTypes, I, SI->getPointerOperand(), SI->getValueOperand()->getType())) { @@ -268,39 +235,16 @@ public: } continue; } - if (auto GEP = dyn_cast<GetElementPtrInst>(&I)) { + if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { if (Value *NoOpBitcast = maybeGenerateBitcast( Builder, PointerTypes, I, GEP->getPointerOperand(), GEP->getSourceElementType())) GEP->setOperand(0, NoOpBitcast); continue; } - if (auto *CB = dyn_cast<CallBase>(&I)) { - CB->removeFnAttrs(AttrMask); - CB->removeRetAttrs(AttrMask); - for (size_t Idx = 0, End = CB->arg_size(); Idx < End; ++Idx) - CB->removeParamAttrs(Idx, AttrMask); - continue; - } } } } - // Remove flags not for DXIL. - cleanModuleFlags(M); - - // dx.rootsignatures will have been parsed from its metadata form as its - // binary form as part of the RootSignatureAnalysisWrapper, so safely - // remove it as it is not recognized in DXIL - if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures")) - RootSignature->eraseFromParent(); - - // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and - // causes all tests using the DXIL Validator to fail. - // - // This is a temporary fix and should be replaced with a whitelist once - // we have determined all metadata that the DXIL Validator allows - if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa")) - ErrNo->eraseFromParent(); return true; } @@ -308,11 +252,11 @@ public: DXILPrepareModule() : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<DXILMetadataAnalysisWrapperPass>(); - AU.addRequired<RootSignatureAnalysisWrapper>(); - AU.addPreserved<RootSignatureAnalysisWrapper>(); - AU.addPreserved<ShaderFlagsAnalysisWrapper>(); + AU.addPreserved<DXILMetadataAnalysisWrapperPass>(); AU.addPreserved<DXILResourceWrapperPass>(); + AU.addPreserved<RootSignatureAnalysisWrapper>(); + AU.addPreserved<ShaderFlagsAnalysisWrapper>(); } static char ID; // Pass identification. }; @@ -323,7 +267,6 @@ char DXILPrepareModule::ID = 0; INITIALIZE_PASS_BEGIN(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", false, false) INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass) -INITIALIZE_PASS_DEPENDENCY(RootSignatureAnalysisWrapper) INITIALIZE_PASS_END(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", false, false) diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp index 9eebcc9..1e4797b 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -7,8 +7,10 @@ //===----------------------------------------------------------------------===// #include "DXILTranslateMetadata.h" +#include "DXILRootSignature.h" #include "DXILShaderFlags.h" #include "DirectX.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/DXILMetadataAnalysis.h" @@ -204,9 +206,9 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags, return MDNode::get(Ctx, MDVals); } -MDTuple *constructEntryMetadata(const Function *EntryFn, MDTuple *Signatures, - MDNode *Resources, MDTuple *Properties, - LLVMContext &Ctx) { +static MDTuple *constructEntryMetadata(const Function *EntryFn, + MDTuple *Signatures, MDNode *Resources, + MDTuple *Properties, LLVMContext &Ctx) { // Each entry point metadata record specifies: // * reference to the entry point function global symbol // * unmangled name @@ -290,42 +292,82 @@ static MDTuple *emitTopLevelLibraryNode(Module &M, MDNode *RMD, return constructEntryMetadata(nullptr, nullptr, RMD, Properties, Ctx); } -// TODO: We might need to refactor this to be more generic, -// in case we need more metadata to be replaced. -static void translateBranchMetadata(Module &M) { - for (Function &F : M) { - for (BasicBlock &BB : F) { - Instruction *BBTerminatorInst = BB.getTerminator(); +static void translateBranchMetadata(Module &M, Instruction *BBTerminatorInst) { + MDNode *HlslControlFlowMD = + BBTerminatorInst->getMetadata("hlsl.controlflow.hint"); + + if (!HlslControlFlowMD) + return; - MDNode *HlslControlFlowMD = - BBTerminatorInst->getMetadata("hlsl.controlflow.hint"); + assert(HlslControlFlowMD->getNumOperands() == 2 && + "invalid operands for hlsl.controlflow.hint"); - if (!HlslControlFlowMD) - continue; + MDBuilder MDHelper(M.getContext()); - assert(HlslControlFlowMD->getNumOperands() == 2 && - "invalid operands for hlsl.controlflow.hint"); + llvm::Metadata *HintsStr = MDHelper.createString("dx.controlflow.hints"); + llvm::Metadata *HintsValue = MDHelper.createConstant( + mdconst::extract<ConstantInt>(HlslControlFlowMD->getOperand(1))); - MDBuilder MDHelper(M.getContext()); - ConstantInt *Op1 = - mdconst::extract<ConstantInt>(HlslControlFlowMD->getOperand(1)); + MDNode *MDNode = llvm::MDNode::get(M.getContext(), {HintsStr, HintsValue}); - SmallVector<llvm::Metadata *, 2> Vals( - ArrayRef<Metadata *>{MDHelper.createString("dx.controlflow.hints"), - MDHelper.createConstant(Op1)}); + BBTerminatorInst->setMetadata("dx.controlflow.hints", MDNode); + BBTerminatorInst->setMetadata("hlsl.controlflow.hint", nullptr); +} + +static std::array<unsigned, 6> getCompatibleInstructionMDs(llvm::Module &M) { + return { + M.getMDKindID("dx.nonuniform"), M.getMDKindID("dx.controlflow.hints"), + M.getMDKindID("dx.precise"), llvm::LLVMContext::MD_range, + llvm::LLVMContext::MD_alias_scope, llvm::LLVMContext::MD_noalias}; +} - MDNode *MDNode = llvm::MDNode::get(M.getContext(), Vals); +static void translateInstructionMetadata(Module &M) { + // construct allowlist of valid metadata node kinds + std::array<unsigned, 6> DXILCompatibleMDs = getCompatibleInstructionMDs(M); - BBTerminatorInst->setMetadata("dx.controlflow.hints", MDNode); - BBTerminatorInst->setMetadata("hlsl.controlflow.hint", nullptr); + for (Function &F : M) { + for (BasicBlock &BB : F) { + // This needs to be done first so that "hlsl.controlflow.hints" isn't + // removed in the whitelist below + if (auto *I = BB.getTerminator()) + translateBranchMetadata(M, I); + + for (auto &I : make_early_inc_range(BB)) { + I.dropUnknownNonDebugMetadata(DXILCompatibleMDs); + } } } } -static void translateMetadata(Module &M, DXILResourceMap &DRM, - DXILResourceTypeMap &DRTM, - const ModuleShaderFlags &ShaderFlags, - const ModuleMetadataInfo &MMDI) { +static void cleanModuleFlags(Module &M) { + NamedMDNode *MDFlags = M.getModuleFlagsMetadata(); + if (!MDFlags) + return; + + SmallVector<llvm::Module::ModuleFlagEntry> FlagEntries; + M.getModuleFlagsMetadata(FlagEntries); + bool Updated = false; + for (auto &Flag : FlagEntries) { + // llvm 3.7 only supports behavior up to AppendUnique. + if (Flag.Behavior <= Module::ModFlagBehavior::AppendUnique) + continue; + Flag.Behavior = Module::ModFlagBehavior::Warning; + Updated = true; + } + + if (!Updated) + return; + + MDFlags->eraseFromParent(); + + for (auto &Flag : FlagEntries) + M.addModuleFlag(Flag.Behavior, Flag.Key->getString(), Flag.Val); +} + +static void translateGlobalMetadata(Module &M, DXILResourceMap &DRM, + DXILResourceTypeMap &DRTM, + const ModuleShaderFlags &ShaderFlags, + const ModuleMetadataInfo &MMDI) { LLVMContext &Ctx = M.getContext(); IRBuilder<> IRB(Ctx); SmallVector<MDNode *> EntryFnMDNodes; @@ -381,6 +423,22 @@ static void translateMetadata(Module &M, DXILResourceMap &DRM, M.getOrInsertNamedMetadata("dx.entryPoints"); for (auto *Entry : EntryFnMDNodes) EntryPointsNamedMD->addOperand(Entry); + + cleanModuleFlags(M); + + // dx.rootsignatures will have been parsed from its metadata form as its + // binary form as part of the RootSignatureAnalysisWrapper, so safely + // remove it as it is not recognized in DXIL + if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures")) + RootSignature->eraseFromParent(); + + // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and + // causes all tests using the DXIL Validator to fail. + // + // This is a temporary fix and should be replaced with a allowlist once + // we have determined all metadata that the DXIL Validator allows + if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa")) + ErrNo->eraseFromParent(); } PreservedAnalyses DXILTranslateMetadata::run(Module &M, @@ -390,8 +448,8 @@ PreservedAnalyses DXILTranslateMetadata::run(Module &M, const ModuleShaderFlags &ShaderFlags = MAM.getResult<ShaderFlagsAnalysis>(M); const dxil::ModuleMetadataInfo MMDI = MAM.getResult<DXILMetadataAnalysis>(M); - translateMetadata(M, DRM, DRTM, ShaderFlags, MMDI); - translateBranchMetadata(M); + translateGlobalMetadata(M, DRM, DRTM, ShaderFlags, MMDI); + translateInstructionMetadata(M); return PreservedAnalyses::all(); } @@ -409,10 +467,13 @@ public: AU.addRequired<DXILResourceWrapperPass>(); AU.addRequired<ShaderFlagsAnalysisWrapper>(); AU.addRequired<DXILMetadataAnalysisWrapperPass>(); - AU.addPreserved<DXILResourceWrapperPass>(); + AU.addRequired<RootSignatureAnalysisWrapper>(); + AU.addPreserved<DXILMetadataAnalysisWrapperPass>(); - AU.addPreserved<ShaderFlagsAnalysisWrapper>(); AU.addPreserved<DXILResourceBindingWrapperPass>(); + AU.addPreserved<DXILResourceWrapperPass>(); + AU.addPreserved<RootSignatureAnalysisWrapper>(); + AU.addPreserved<ShaderFlagsAnalysisWrapper>(); } bool runOnModule(Module &M) override { @@ -425,8 +486,8 @@ public: dxil::ModuleMetadataInfo MMDI = getAnalysis<DXILMetadataAnalysisWrapperPass>().getModuleMetadata(); - translateMetadata(M, DRM, DRTM, ShaderFlags, MMDI); - translateBranchMetadata(M); + translateGlobalMetadata(M, DRM, DRTM, ShaderFlags, MMDI); + translateInstructionMetadata(M); return true; } }; @@ -443,6 +504,7 @@ INITIALIZE_PASS_BEGIN(DXILTranslateMetadataLegacy, "dxil-translate-metadata", "DXIL Translate Metadata", false, false) INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapperPass) INITIALIZE_PASS_DEPENDENCY(ShaderFlagsAnalysisWrapper) +INITIALIZE_PASS_DEPENDENCY(RootSignatureAnalysisWrapper) INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass) INITIALIZE_PASS_END(DXILTranslateMetadataLegacy, "dxil-translate-metadata", "DXIL Translate Metadata", false, false) diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.h b/llvm/lib/Target/DirectX/DXILTranslateMetadata.h index f3f5eb1..4c1ffac 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.h +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.h @@ -13,7 +13,8 @@ namespace llvm { -/// A pass that transforms DXIL Intrinsics that don't have DXIL opCodes +/// A pass that transforms LLVM Metadata in the module to it's DXIL equivalent, +/// then emits all recognized DXIL Metadata class DXILTranslateMetadata : public PassInfoMixin<DXILTranslateMetadata> { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &); diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td index fb0928b8..ede8463 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.td +++ b/llvm/lib/Target/Hexagon/Hexagon.td @@ -79,6 +79,12 @@ def ExtensionHVXV79: SubtargetFeature<"hvxv79", "HexagonHVXVersion", ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71, ExtensionHVXV73, ExtensionHVXV75]>; +def ExtensionHVXV81: SubtargetFeature<"hvxv81", "HexagonHVXVersion", + "Hexagon::ArchEnum::V81", "Hexagon HVX instructions", + [ExtensionHVXV65, ExtensionHVXV66, ExtensionHVXV67, + ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71, + ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79]>; + def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps", "true", "Hexagon HVX 64B instructions", [ExtensionHVX]>; def ExtensionHVX128B: SubtargetFeature<"hvx-length128b", "UseHVX128BOps", @@ -151,6 +157,8 @@ def UseHVXV75 : Predicate<"HST->useHVXV75Ops()">, AssemblerPredicate<(all_of ExtensionHVXV75)>; def UseHVXV79 : Predicate<"HST->useHVXV79Ops()">, AssemblerPredicate<(all_of ExtensionHVXV79)>; +def UseHVXV81 : Predicate<"HST->useHVXV81Ops()">, + AssemblerPredicate<(all_of ExtensionHVXV81)>; def UseAudio : Predicate<"HST->useAudioOps()">, AssemblerPredicate<(all_of ExtensionAudio)>; def UseZReg : Predicate<"HST->useZRegOps()">, @@ -488,6 +496,11 @@ def : Proc<"hexagonv79", HexagonModelV79, ArchV68, ArchV69, ArchV71, ArchV73, ArchV75, ArchV79, FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; +def : Proc<"hexagonv81", HexagonModelV81, + [ArchV65, ArchV66, ArchV67, ArchV68, ArchV69, ArchV71, ArchV73, + ArchV75, ArchV79, ArchV81, + FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; // Need to update the correct features for tiny core. // Disable NewValueJumps since the packetizer is unable to handle a packet with diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h index 8984534..9bf4034 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepArch.h +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h @@ -29,7 +29,8 @@ enum class ArchEnum { V71, V73, V75, - V79 + V79, + V81 }; inline std::optional<Hexagon::ArchEnum> getCpu(StringRef CPU) { @@ -50,6 +51,7 @@ inline std::optional<Hexagon::ArchEnum> getCpu(StringRef CPU) { .Case("hexagonv73", Hexagon::ArchEnum::V73) .Case("hexagonv75", Hexagon::ArchEnum::V75) .Case("hexagonv79", Hexagon::ArchEnum::V79) + .Case("hexagonv81", Hexagon::ArchEnum::V81) .Default(std::nullopt); } } // namespace Hexagon diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.td b/llvm/lib/Target/Hexagon/HexagonDepArch.td index 8ec1d93..f623fd0 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepArch.td +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.td @@ -34,3 +34,5 @@ def ArchV75: SubtargetFeature<"v75", "HexagonArchVersion", "Hexagon::ArchEnum::V def HasV75 : Predicate<"HST->hasV75Ops()">, AssemblerPredicate<(all_of ArchV75)>; def ArchV79: SubtargetFeature<"v79", "HexagonArchVersion", "Hexagon::ArchEnum::V79", "Enable Hexagon V79 architecture">; def HasV79 : Predicate<"HST->hasV79Ops()">, AssemblerPredicate<(all_of ArchV79)>; +def ArchV81: SubtargetFeature<"v81", "HexagonArchVersion", "Hexagon::ArchEnum::V81", "Enable Hexagon V81 architecture">; +def HasV81 : Predicate<"HST->hasV81Ops()">, AssemblerPredicate<(all_of ArchV81)>; diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td index 93696e0..f4e36fa7 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td @@ -7222,3 +7222,595 @@ class DepHVXItinV79 { [Hex_FWD, Hex_FWD, HVX_FWD]> ]; } + +class DepHVXItinV81 { + list<InstrItinData> DepHVXItinV81_list = [ + InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL]>], [], + []>, + + InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7], + [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2], + [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_37820f4c, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_3904b926, /*SLOT01,LOAD*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7], + [HVX_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_3ce09744, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_4942646a, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_531b383c, /*SLOT0123*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_540c3da3, /*SLOT0,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], + [Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_56e64202, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_649072c2, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_7095ecba, /*SLOT01,LOAD,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_7177e272, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9], + [HVX_FWD]>, + + InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_72e2b393, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_73efe966, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_7417e785, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL]>], [3, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_7d68d5c2, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_8772086c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/ + [InstrStage<1, [SLOT2], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_946013d8, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_ZW]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a19b9305, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_a28f32b5, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_a69eeee1, /*SLOT01,LOAD,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ab23f776, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ac4046bc, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_b091f1c6, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bb599486, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c127de3a, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_c4edf264, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_cda936da, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_dcca380f, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_ZW]>], [2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_e2fdd6e6, /*SLOT0123*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL]>], [3], + [HVX_FWD]>, + + InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_e699ae41, /*SLOT01,ZW*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_ZW]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_f175e046, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/ + [InstrStage<1, [SLOT2], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]> + ]; +}
\ No newline at end of file diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td index 7a1ad3e..48b665c 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td @@ -13740,3 +13740,891 @@ class DepScalarItinV79 { [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> ]; } + +class DepScalarItinV81 { + list<InstrItinData> DepScalarItinV81_list = [ + InstrItinData <tc_011e0e9d, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_01d44cb2, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_01e1be3b, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_02fe1c65, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0655b949, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_075c8dd8, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0a195f2c, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0a43be35, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_0a6c20ae, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0ba0d5da, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_0dfac0a7, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0fac1eb8, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_112d30d6, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_1242dc2a, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_1248597c, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_139ef484, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_14ab4f41, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 3, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_151bf368, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_158aa3f7, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_197dce51, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1981450d, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3], + [Hex_FWD]>, + + InstrItinData <tc_1c2c7a4a, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1c7522a8, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1d41f8b7, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1fcb8495, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1fe4ab69, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_20131976, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_2237d952, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_23708a21, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], + []>, + + InstrItinData <tc_2471c1c8, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_24e109c7, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_24f426ab, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_27106296, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_280f7fe1, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_28e55c6f, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_2c13e7f5, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_2c3e17fc, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_2f573607, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_33e7e673, /*tc_2early*/ + [InstrStage<1, [SLOT2]>], [], + []>, + + InstrItinData <tc_362b0be2, /*tc_3*/ + [InstrStage<1, [SLOT2]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_38382228, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_388f9897, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_38e0bae9, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3d14a17b, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3edca78f, /*tc_2*/ + [InstrStage<1, [SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3fbf1042, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3], + [Hex_FWD]>, + + InstrItinData <tc_407e96f9, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_40d64c94, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4222e6bf, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_42ff66ba, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_442395f3, /*tc_2latepred*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_449acf79, /*tc_latepredstaia*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_44d5a428, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_44fffc58, /*tc_3*/ + [InstrStage<1, [SLOT2, SLOT3]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_45791fb8, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_45f9d1be, /*tc_2early*/ + [InstrStage<1, [SLOT2]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_46c18ecf, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_49fdfd4b, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4a55d03c, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4abdbdc6, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4ac61d92, /*tc_2latepred*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4bf903b0, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3], + [Hex_FWD]>, + + InstrItinData <tc_503ce0f3, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_512b1653, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_53c851ab, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_54f0cee2, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_5502c366, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_55255f2b, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [], + []>, + + InstrItinData <tc_556f6577, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_55a9a350, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_55b33fda, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_56a124a7, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_57a55b54, /*tc_1*/ + [InstrStage<1, [SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5944960d, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_59a7822c, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5a222e89, /*tc_2early*/ + [InstrStage<1, [SLOT2]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5a4b5e58, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5b347363, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5ceb2f9e, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5da50c4b, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5deb5e47, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5e4cf0e8, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_60e324ff, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_63567288, /*tc_2latepred*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4], + [Hex_FWD]>, + + InstrItinData <tc_64b00d8a, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_651cbe02, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_65279839, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_65cbd974, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_69bfb303, /*tc_3*/ + [InstrStage<1, [SLOT2, SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6aa823ab, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6ae3426b, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6d861a95, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6e20402a, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6f42bc60, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6fb52018, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6fc5dbea, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_711c805f, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_713b66bf, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7401744f, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7476d766, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_74a42bda, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_759e57be, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_76bb5435, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7d6a2568, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_77f94a5e, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_788b1d09, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_78f87ed3, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_7af3a37e, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7b9187d3, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7c28bd7e, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3], + [Hex_FWD]>, + + InstrItinData <tc_7c31e19a, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7c6d32e4, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7dc63b5c, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7f58404a, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [], + []>, + + InstrItinData <tc_7f7f45f5, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7f8ae742, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8035e91f, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_822c3c68, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_829d8a86, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_838c4d7a, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_84a7500d, /*tc_2*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_86173609, /*tc_2latepred*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_887d1bb7, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8a6d0d94, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8a825db2, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8b5bd4f5, /*tc_2*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8e82e8ca, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8f36a2fd, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9124c04f, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_92240447, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_934753bb, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_937dd41c, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [], + []>, + + InstrItinData <tc_9406230a, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_95a33176, /*tc_2*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_95f43c5e, /*tc_3*/ + [InstrStage<1, [SLOT2]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_96ef76ef, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_975a4e54, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9783714b, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9b20a062, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9b34f5e0, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [], + []>, + + InstrItinData <tc_9b3c0462, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9bcfb2ee, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9c52f549, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9e27f2f9, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9e72dc89, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9edb7c77, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9edefe01, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9f6cd987, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a08b630b, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a1297125, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a154b476, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a2b365d2, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a3070909, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a32e03e7, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a38c45dc, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a4e22bbd, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a4ee89db, /*tc_2early*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_a724463d, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a7a13fac, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a7bdb22c, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a9edeffa, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_abfd9a6d, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ac65613f, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_addc37a8, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ae5babd7, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_aee6250c, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_af6af259, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_b1ae5f67, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_b2196a3f, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_b3d46584, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_b4dc7630, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_b7c4062a, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_b837298f, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], + []>, + + InstrItinData <tc_b9bec29e, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [], + []>, + + InstrItinData <tc_ba9255a6, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bb07f2c5, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bb78483e, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bb831a7c, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bf2ffc0f, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c20701f0, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c21d7447, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c57d9f39, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c818ff7f, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_ce59038e, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_cfa0e29b, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d03278fd, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d234b61a, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_d33e5eee, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d3632d88, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d45ba9cd, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_d57d649c, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_d61dfdc3, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d68dca5c, /*tc_3stall*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d71ea8fa, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d7718fbe, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_db596beb, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_db96aa6b, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_dc51281d, /*tc_3*/ + [InstrStage<1, [SLOT2]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_decdde8a, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_df5d53f9, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_e3d699e3, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_e60def48, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_e9170fb7, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ed03645c, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ed3f8d2a, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_eed07714, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_eeda4109, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ef921005, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f098b237, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f0cdeccf, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f0e8e832, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f34c1c21, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f38f92e1, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_f529831b, /*tc_latepredstaia*/ + [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f6e2aff9, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f7569068, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f97707c1, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_f999c66e, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_fae9dfa5, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_fedb7e19, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> + ]; +}
\ No newline at end of file diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td index ae96753..f8f1c2a 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -39178,6 +39178,19 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vsub_hf_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vsub($Vu32.hf,$Vv32.qf16)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011010000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vsub_qf16 : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -39269,6 +39282,19 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vsub_sf_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vsub($Vu32.sf,$Vv32.qf32)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011010000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vsub_sf_sf : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -41116,6 +41142,17 @@ let hasNewValue = 1; let opNewValue = 0; let isSolo = 1; } +def Y2_tlbpp : HInst< +(outs IntRegs:$Rd32), +(ins DoubleRegs:$Rss32), +"$Rd32 = tlbp($Rss32)", +tc_6aa823ab, TypeCR>, Enc_90cd8b, Requires<[HasV81]> { +let Inst{13-5} = 0b000000000; +let Inst{31-21} = 0b01101100011; +let hasNewValue = 1; +let opNewValue = 0; +let isSolo = 1; +} def Y2_tlbr : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td index 17cb96c..23f4b3a 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td +++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td @@ -3827,3 +3827,14 @@ def: Pat<(int_hexagon_V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2), (V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV79, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsub_hf_f8_128B HvxVR:$src1, HvxVR:$src2), (V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV79, UseHVX128B]>; + +// V81 HVX Instructions. + +def: Pat<(int_hexagon_V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_hf_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_sf_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index e285e04..7ee280d 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -654,7 +654,9 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { IntNo == Intrinsic::hexagon_V6_vgathermh || IntNo == Intrinsic::hexagon_V6_vgathermh_128B || IntNo == Intrinsic::hexagon_V6_vgathermhw || - IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) { + IntNo == Intrinsic::hexagon_V6_vgathermhw_128B || + IntNo == Intrinsic::hexagon_V6_vgather_vscattermh || + IntNo == Intrinsic::hexagon_V6_vgather_vscattermh_128B) { SelectV65Gather(N); return; } diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index c7a4f68..3cc146b 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -2953,6 +2953,10 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { case Intrinsic::hexagon_V6_vgathermhw_128B: Opcode = Hexagon::V6_vgathermhw_pseudo; break; + case Intrinsic::hexagon_V6_vgather_vscattermh: + case Intrinsic::hexagon_V6_vgather_vscattermh_128B: + Opcode = Hexagon::V6_vgather_vscatter_mh_pseudo; + break; } SDVTList VTs = CurDAG->getVTList(MVT::Other); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 9f7f434..526b4de 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2145,7 +2145,9 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::hexagon_V6_vgathermhq: case Intrinsic::hexagon_V6_vgathermhq_128B: case Intrinsic::hexagon_V6_vgathermhwq: - case Intrinsic::hexagon_V6_vgathermhwq_128B: { + case Intrinsic::hexagon_V6_vgathermhwq_128B: + case Intrinsic::hexagon_V6_vgather_vscattermh: + case Intrinsic::hexagon_V6_vgather_vscattermh_128B: { const Module &M = *I.getParent()->getParent()->getParent(); Info.opc = ISD::INTRINSIC_W_CHAIN; Type *VecTy = I.getArgOperand(1)->getType(); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 939841a..47726d6 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1554,80 +1554,93 @@ HexagonInstrInfo::expandVGatherPseudo(MachineInstr &MI) const { MachineBasicBlock::iterator First; switch (Opc) { - case Hexagon::V6_vgathermh_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermw_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermhw_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermhq_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)) - .add(MI.getOperand(5)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermwq_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)) - .add(MI.getOperand(5)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermhwq_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)) - .add(MI.getOperand(5)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); + case Hexagon::V6_vgather_vscatter_mh_pseudo: + // This is mainly a place holder. It will be extended. + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vscattermh)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + case Hexagon::V6_vgathermh_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermw_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermhw_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermhq_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .add(MI.getOperand(5)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermwq_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .add(MI.getOperand(5)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermhwq_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .add(MI.getOperand(5)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); } return MI.getIterator(); @@ -2806,6 +2819,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::V6_vL32b_nt_tmp_npred_ai: case Hexagon::V6_vS32Ub_npred_ai: case Hexagon::V6_vgathermh_pseudo: + case Hexagon::V6_vgather_vscatter_mh_pseudo: case Hexagon::V6_vgathermw_pseudo: case Hexagon::V6_vgathermhw_pseudo: case Hexagon::V6_vgathermhq_pseudo: diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td index f927f9b..42393d0 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td @@ -40,6 +40,19 @@ defm V6_vgathermh_pseudo : vgathermh<HvxVR>; defm V6_vgathermw_pseudo : vgathermw<HvxVR>; defm V6_vgathermhw_pseudo : vgathermhw<HvxWR>; + +multiclass vgather_scatter_mh<RegisterClass RC> { + let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, + mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in + def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), + (ins IntRegs:$_dst_, s4_0Imm:$Ii, + IntRegs:$Rt, ModRegs:$Mu, RC:$Vv), + ".error \"should not emit\" ", + []>; +} + +defm V6_vgather_vscatter_mh_pseudo : vgather_scatter_mh<HvxVR>; + multiclass vgathermhq<RegisterClass RC1, RegisterClass RC2> { let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in diff --git a/llvm/lib/Target/Hexagon/HexagonSchedule.td b/llvm/lib/Target/Hexagon/HexagonSchedule.td index b8a9cf3..9bcd4bf 100644 --- a/llvm/lib/Target/Hexagon/HexagonSchedule.td +++ b/llvm/lib/Target/Hexagon/HexagonSchedule.td @@ -75,3 +75,4 @@ include "HexagonScheduleV71T.td" include "HexagonScheduleV73.td" include "HexagonScheduleV75.td" include "HexagonScheduleV79.td" +include "HexagonScheduleV81.td"
\ No newline at end of file diff --git a/llvm/lib/Target/Hexagon/HexagonScheduleV81.td b/llvm/lib/Target/Hexagon/HexagonScheduleV81.td new file mode 100644 index 0000000..dd5f5a0 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonScheduleV81.td @@ -0,0 +1,31 @@ +//=-HexagonScheduleV81.td - HexagonV81 Scheduling Definitions *- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def HexagonV81ItinList : DepScalarItinV81, ScalarItin, + DepHVXItinV81, HVXItin, PseudoItin { + list<InstrItinData> ItinList = + !listconcat(DepScalarItinV81_list, ScalarItin_list, + DepHVXItinV81_list, HVXItin_list, PseudoItin_list); +} + +def HexagonItinerariesV81 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, + CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL, + CVI_ALL_NOMEM, CVI_ZW], + [Hex_FWD, HVX_FWD], + HexagonV81ItinList.ItinList>; + +def HexagonModelV81 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV81; + let LoadLatency = 1; + let CompleteModel = 0; +} diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 7430567..995f66d 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -224,6 +224,15 @@ public: bool useHVXV79Ops() const { return HexagonHVXVersion >= Hexagon::ArchEnum::V79; } + bool hasV81Ops() const { + return getHexagonArchVersion() >= Hexagon::ArchEnum::V81; + } + bool hasV81OpsOnly() const { + return getHexagonArchVersion() == Hexagon::ArchEnum::V81; + } + bool useHVXV81Ops() const { + return HexagonHVXVersion >= Hexagon::ArchEnum::V81; + } bool useAudioOps() const { return UseAudioOps; } bool useCompound() const { return UseCompound; } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 171e294..e925e04 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -31,6 +31,10 @@ using namespace llvm; static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false), cl::Hidden, cl::desc("Enable loop vectorizer for HVX")); +cl::opt<bool> HexagonAllowScatterGatherHVX( + "hexagon-allow-scatter-gather-hvx", cl::init(false), cl::Hidden, + cl::desc("Allow auto-generation of HVX scatter-gather")); + static cl::opt<bool> EnableV68FloatAutoHVX( "force-hvx-float", cl::Hidden, cl::desc("Enable auto-vectorization of floatint point types on v68.")); @@ -354,6 +358,61 @@ bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/, return HexagonMaskedVMem && ST.isTypeForHVX(DataType); } +bool HexagonTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) const { + // For now assume we can not deal with all HVX datatypes. + if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) || + !HexagonAllowScatterGatherHVX) + return false; + // This must be in sync with HexagonVectorCombine pass. + switch (Ty->getScalarSizeInBits()) { + case 8: + return (getTypeNumElements(Ty) == 128); + case 16: + if (getTypeNumElements(Ty) == 64 || getTypeNumElements(Ty) == 32) + return (Alignment >= 2); + break; + case 32: + if (getTypeNumElements(Ty) == 32) + return (Alignment >= 4); + break; + default: + break; + } + return false; +} + +bool HexagonTTIImpl::isLegalMaskedScatter(Type *Ty, Align Alignment) const { + if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) || + !HexagonAllowScatterGatherHVX) + return false; + // This must be in sync with HexagonVectorCombine pass. + switch (Ty->getScalarSizeInBits()) { + case 8: + return (getTypeNumElements(Ty) == 128); + case 16: + if (getTypeNumElements(Ty) == 64) + return (Alignment >= 2); + break; + case 32: + if (getTypeNumElements(Ty) == 32) + return (Alignment >= 4); + break; + default: + break; + } + return false; +} + +bool HexagonTTIImpl::forceScalarizeMaskedGather(VectorType *VTy, + Align Alignment) const { + return !isLegalMaskedGather(VTy, Alignment); +} + +bool HexagonTTIImpl::forceScalarizeMaskedScatter(VectorType *VTy, + Align Alignment) const { + return !isLegalMaskedScatter(VTy, Alignment); +} + /// --- Vector TTI end --- unsigned HexagonTTIImpl::getPrefetchDistance() const { diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index dbf16c9..cec2bf9 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -169,6 +169,12 @@ public: unsigned AddressSpace) const override; bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace) const override; + bool isLegalMaskedGather(Type *Ty, Align Alignment) const override; + bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override; + bool forceScalarizeMaskedGather(VectorType *VTy, + Align Alignment) const override; + bool forceScalarizeMaskedScatter(VectorType *VTy, + Align Alignment) const override; /// @} diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 9ab5202..5c50ec2 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -57,6 +57,11 @@ #define DEBUG_TYPE "hexagon-vc" +// This is a const that represents default HVX VTCM page size. +// It is boot time configurable, so we probably want an API to +// read it, but for now assume 128KB +#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072 + using namespace llvm; namespace { @@ -418,6 +423,18 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) { class HvxIdioms { public: + enum DstQualifier { + Undefined = 0, + Arithmetic, + LdSt, + LLVM_Gather, + LLVM_Scatter, + HEX_Gather_Scatter, + HEX_Gather, + HEX_Scatter, + Call + }; + HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) { auto *Int32Ty = HVC.getIntTy(32); HvxI32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/false); @@ -473,6 +490,11 @@ private: auto createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX, Signedness SgnX, ArrayRef<Value *> WordY, Signedness SgnY) const -> SmallVector<Value *>; + // Vector manipulations for Ripple + bool matchScatter(Instruction &In) const; + bool matchGather(Instruction &In) const; + Value *processVScatter(Instruction &In) const; + Value *processVGather(Instruction &In) const; VectorType *HvxI32Ty; VectorType *HvxP32Ty; @@ -1545,7 +1567,7 @@ auto AlignVectors::isSectorTy(Type *Ty) const -> bool { } auto AlignVectors::run() -> bool { - LLVM_DEBUG(dbgs() << "Running HVC::AlignVectors on " << HVC.F.getName() + LLVM_DEBUG(dbgs() << "\nRunning HVC::AlignVectors on " << HVC.F.getName() << '\n'); if (!createAddressGroups()) return false; @@ -1797,6 +1819,846 @@ auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const return Ext; } +inline bool HvxIdioms::matchScatter(Instruction &In) const { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&In); + if (!II) + return false; + return (II->getIntrinsicID() == Intrinsic::masked_scatter); +} + +inline bool HvxIdioms::matchGather(Instruction &In) const { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&In); + if (!II) + return false; + return (II->getIntrinsicID() == Intrinsic::masked_gather); +} + +Instruction *locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual); + +// Binary instructions we want to handle as users of gather/scatter. +inline bool isArithmetic(unsigned Opc) { + switch (Opc) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::AShr: + case Instruction::LShr: + case Instruction::Shl: + case Instruction::UDiv: + return true; + } + return false; +} + +// TODO: Maybe use MemoryLocation for this. See getLocOrNone above. +inline Value *getPointer(Value *Ptr) { + assert(Ptr && "Unable to extract pointer"); + if (isa<AllocaInst>(Ptr) || isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) + return Ptr; + if (isa<LoadInst>(Ptr) || isa<StoreInst>(Ptr)) + return getLoadStorePointerOperand(Ptr); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) { + if (II->getIntrinsicID() == Intrinsic::masked_store) + return II->getOperand(1); + } + return nullptr; +} + +static Instruction *selectDestination(Instruction *In, + HvxIdioms::DstQualifier &Qual) { + Instruction *Destination = nullptr; + if (!In) + return Destination; + if (isa<StoreInst>(In)) { + Destination = In; + Qual = HvxIdioms::LdSt; + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) { + if (II->getIntrinsicID() == Intrinsic::masked_gather) { + Destination = In; + Qual = HvxIdioms::LLVM_Gather; + } else if (II->getIntrinsicID() == Intrinsic::masked_scatter) { + Destination = In; + Qual = HvxIdioms::LLVM_Scatter; + } else if (II->getIntrinsicID() == Intrinsic::masked_store) { + Destination = In; + Qual = HvxIdioms::LdSt; + } else if (II->getIntrinsicID() == + Intrinsic::hexagon_V6_vgather_vscattermh) { + Destination = In; + Qual = HvxIdioms::HEX_Gather_Scatter; + } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) { + Destination = In; + Qual = HvxIdioms::HEX_Scatter; + } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) { + Destination = In; + Qual = HvxIdioms::HEX_Gather; + } + } else if (isa<ZExtInst>(In)) { + return locateDestination(In, Qual); + } else if (isa<CastInst>(In)) { + return locateDestination(In, Qual); + } else if (isa<CallInst>(In)) { + Destination = In; + Qual = HvxIdioms::Call; + } else if (isa<GetElementPtrInst>(In)) { + return locateDestination(In, Qual); + } else if (isArithmetic(In->getOpcode())) { + Destination = In; + Qual = HvxIdioms::Arithmetic; + } else { + LLVM_DEBUG(dbgs() << "Unhandled destination : " << *In << "\n"); + } + return Destination; +} + +// This method attempts to find destination (user) for a given intrinsic. +// Given that these are produced only by Ripple, the number of options is +// limited. Simplest case is explicit store which in fact is redundant (since +// HVX gater creates its own store during packetization). Nevertheless we need +// to figure address where we storing. Other cases are more complicated, but +// still few. +Instruction *locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual) { + Instruction *Destination = nullptr; + if (!In) + return Destination; + // Get all possible destinations + SmallVector<Instruction *> Users; + // Iterate over the uses of the instruction + for (auto &U : In->uses()) { + if (auto *UI = dyn_cast<Instruction>(U.getUser())) { + Destination = selectDestination(UI, Qual); + if (Destination) + Users.push_back(Destination); + } + } + // Now see which of the users (if any) is a memory destination. + for (auto *I : Users) + if (getPointer(I)) + return I; + return Destination; +} + +// The two intrinsics we handle here have GEP in a different position. +inline GetElementPtrInst *locateGepFromIntrinsic(Instruction *In) { + assert(In && "Bad instruction"); + IntrinsicInst *IIn = dyn_cast<IntrinsicInst>(In); + assert((IIn && (IIn->getIntrinsicID() == Intrinsic::masked_gather || + IIn->getIntrinsicID() == Intrinsic::masked_scatter)) && + "Not a gather Intrinsic"); + GetElementPtrInst *GEPIndex = nullptr; + if (IIn->getIntrinsicID() == Intrinsic::masked_gather) + GEPIndex = dyn_cast<GetElementPtrInst>(IIn->getOperand(0)); + else + GEPIndex = dyn_cast<GetElementPtrInst>(IIn->getOperand(1)); + return GEPIndex; +} + +// Given the intrinsic find its GEP argument and extract base address it uses. +// The method relies on the way how Ripple typically forms the GEP for +// scatter/gather. +static Value *locateAddressFromIntrinsic(Instruction *In) { + GetElementPtrInst *GEPIndex = locateGepFromIntrinsic(In); + if (!GEPIndex) { + LLVM_DEBUG(dbgs() << " No GEP in intrinsic\n"); + return nullptr; + } + Value *BaseAddress = GEPIndex->getPointerOperand(); + auto *IndexLoad = dyn_cast<LoadInst>(BaseAddress); + if (IndexLoad) + return IndexLoad; + + auto *IndexZEx = dyn_cast<ZExtInst>(BaseAddress); + if (IndexZEx) { + IndexLoad = dyn_cast<LoadInst>(IndexZEx->getOperand(0)); + if (IndexLoad) + return IndexLoad; + IntrinsicInst *II = dyn_cast<IntrinsicInst>(IndexZEx->getOperand(0)); + if (II && II->getIntrinsicID() == Intrinsic::masked_gather) + return locateAddressFromIntrinsic(II); + } + auto *BaseShuffle = dyn_cast<ShuffleVectorInst>(BaseAddress); + if (BaseShuffle) { + IndexLoad = dyn_cast<LoadInst>(BaseShuffle->getOperand(0)); + if (IndexLoad) + return IndexLoad; + auto *IE = dyn_cast<InsertElementInst>(BaseShuffle->getOperand(0)); + if (IE) { + auto *Src = IE->getOperand(1); + IndexLoad = dyn_cast<LoadInst>(Src); + if (IndexLoad) + return IndexLoad; + auto *Alloca = dyn_cast<AllocaInst>(Src); + if (Alloca) + return Alloca; + if (isa<Argument>(Src)) { + return Src; + } + if (isa<GlobalValue>(Src)) { + return Src; + } + } + } + LLVM_DEBUG(dbgs() << " Unable to locate Address from intrinsic\n"); + return nullptr; +} + +static Type *getIndexType(Value *In) { + if (!In) + return nullptr; + + if (isa<LoadInst>(In) || isa<StoreInst>(In)) + return getLoadStoreType(In); + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) { + if (II->getIntrinsicID() == Intrinsic::masked_load) + return II->getType(); + if (II->getIntrinsicID() == Intrinsic::masked_store) + return II->getOperand(0)->getType(); + } + return In->getType(); +} + +static Value *locateIndexesFromGEP(Value *In) { + if (!In) + return nullptr; + if (isa<LoadInst>(In)) + return In; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) { + if (II->getIntrinsicID() == Intrinsic::masked_load) + return In; + if (II->getIntrinsicID() == Intrinsic::masked_gather) + return In; + } + if (auto *IndexZEx = dyn_cast<ZExtInst>(In)) + return locateIndexesFromGEP(IndexZEx->getOperand(0)); + if (auto *IndexSEx = dyn_cast<SExtInst>(In)) + return locateIndexesFromGEP(IndexSEx->getOperand(0)); + if (auto *BaseShuffle = dyn_cast<ShuffleVectorInst>(In)) + return locateIndexesFromGEP(BaseShuffle->getOperand(0)); + if (auto *IE = dyn_cast<InsertElementInst>(In)) + return locateIndexesFromGEP(IE->getOperand(1)); + if (auto *cstDataVector = dyn_cast<ConstantDataVector>(In)) + return cstDataVector; + if (auto *GEPIndex = dyn_cast<GetElementPtrInst>(In)) + return GEPIndex->getOperand(0); + return nullptr; +} + +// Given the intrinsic find its GEP argument and extract offsetts from the base +// address it uses. +static Value *locateIndexesFromIntrinsic(Instruction *In) { + GetElementPtrInst *GEPIndex = locateGepFromIntrinsic(In); + if (!GEPIndex) { + LLVM_DEBUG(dbgs() << " No GEP in intrinsic\n"); + return nullptr; + } + Value *Indexes = GEPIndex->getOperand(1); + if (auto *IndexLoad = locateIndexesFromGEP(Indexes)) + return IndexLoad; + + LLVM_DEBUG(dbgs() << " Unable to locate Index from intrinsic\n"); + return nullptr; +} + +// Because of aukward definition of many Hex intrinsics we often have to +// reinterprete HVX native <64 x i16> as <32 x i32> which in practice is a NOP +// for all use cases, so this only exist to make IR builder happy. +inline Value *getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, + IRBuilderBase &Builder, + LLVMContext &Ctx, Value *I) { + assert(I && "Unable to reinterprete cast"); + Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false); + std::vector<unsigned> shuffleMask; + for (unsigned i = 0; i < 64; ++i) + shuffleMask.push_back(i); + Constant *Mask = llvm::ConstantDataVector::get(Ctx, shuffleMask); + Value *CastShuffle = + Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle"); + return Builder.CreateBitCast(CastShuffle, NT, "cst64_i16_to_32_i32"); +} + +// Recast <128 x i8> as <32 x i32> +inline Value *getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, + IRBuilderBase &Builder, + LLVMContext &Ctx, Value *I) { + assert(I && "Unable to reinterprete cast"); + Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false); + std::vector<unsigned> shuffleMask; + for (unsigned i = 0; i < 128; ++i) + shuffleMask.push_back(i); + Constant *Mask = llvm::ConstantDataVector::get(Ctx, shuffleMask); + Value *CastShuffle = + Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle"); + return Builder.CreateBitCast(CastShuffle, NT, "cst128_i8_to_32_i32"); +} + +// Create <32 x i32> mask reinterpreted as <128 x i1> with a given pattern +inline Value *get_i32_Mask(const HexagonVectorCombine &HVC, + IRBuilderBase &Builder, LLVMContext &Ctx, + unsigned int pattern) { + std::vector<unsigned int> byteMask; + for (unsigned i = 0; i < 32; ++i) + byteMask.push_back(pattern); + + return Builder.CreateIntrinsic( + HVC.getBoolTy(128), HVC.HST.getIntrinsicId(Hexagon::V6_vandvrt), + {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)}, + nullptr); +} + +Value *HvxIdioms::processVScatter(Instruction &In) const { + auto *InpTy = dyn_cast<VectorType>(In.getOperand(0)->getType()); + assert(InpTy && "Cannot handle no vector type for llvm.scatter/gather"); + unsigned InpSize = HVC.getSizeOf(InpTy); + auto *F = In.getFunction(); + LLVMContext &Ctx = F->getContext(); + auto *ElemTy = dyn_cast<IntegerType>(InpTy->getElementType()); + assert(ElemTy && "llvm.scatter needs integer type argument"); + unsigned ElemWidth = HVC.DL.getTypeAllocSize(ElemTy); + LLVM_DEBUG({ + unsigned Elements = HVC.length(InpTy); + dbgs() << "\n[Process scatter](" << In << ")\n" << *In.getParent() << "\n"; + dbgs() << " Input type(" << *InpTy << ") elements(" << Elements + << ") VecLen(" << InpSize << ") type(" << *ElemTy << ") ElemWidth(" + << ElemWidth << ")\n"; + }); + + IRBuilder Builder(In.getParent(), In.getIterator(), + InstSimplifyFolder(HVC.DL)); + + auto *ValueToScatter = In.getOperand(0); + LLVM_DEBUG(dbgs() << " ValueToScatter : " << *ValueToScatter << "\n"); + + if (HVC.HST.getVectorLength() != InpSize) { + LLVM_DEBUG(dbgs() << "Unhandled vector size(" << InpSize + << ") for vscatter\n"); + return nullptr; + } + + // Base address of indexes. + auto *IndexLoad = locateAddressFromIntrinsic(&In); + if (!IndexLoad) + return nullptr; + LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n"); + + // Address of destination. Must be in VTCM. + auto *Ptr = getPointer(IndexLoad); + if (!Ptr) + return nullptr; + LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n"); + // Indexes/offsets + auto *Indexes = locateIndexesFromIntrinsic(&In); + if (!Indexes) + return nullptr; + LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n"); + Value *CastedDst = Builder.CreateBitOrPointerCast(Ptr, Type::getInt32Ty(Ctx), + "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedDst : " << *CastedDst << "\n"); + // Adjust Indexes + auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes); + Value *CastIndex = nullptr; + if (cstDataVector) { + // Our indexes are represented as a constant. We need it in a reg. + AllocaInst *IndexesAlloca = + Builder.CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32), false)); + [[maybe_unused]] auto *StoreIndexes = + Builder.CreateStore(cstDataVector, IndexesAlloca); + LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n"); + CastIndex = Builder.CreateLoad(IndexesAlloca->getAllocatedType(), + IndexesAlloca, "reload_index"); + } else { + if (ElemWidth == 2) + CastIndex = getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes); + else + CastIndex = Indexes; + } + LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n"); + + if (ElemWidth == 1) { + // v128i8 There is no native instruction for this. + // Do this as two Hi/Lo gathers with masking. + Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false); + // Extend indexes. We assume that indexes are in 128i8 format - need to + // expand them to Hi/Lo 64i16 + Value *CastIndexes = Builder.CreateBitCast(CastIndex, NT, "cast_to_32i32"); + auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub); + auto *UnpackedIndexes = Builder.CreateIntrinsic( + HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastIndexes, nullptr); + LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes << ")\n"); + + auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi); + auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo); + [[maybe_unused]] Value *IndexHi = + HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes); + [[maybe_unused]] Value *IndexLo = + HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes); + LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n"); + LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n"); + // Now unpack values to scatter + Value *CastSrc = + getReinterpretiveCast_i8_to_i32(HVC, Builder, Ctx, ValueToScatter); + LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n"); + auto *UnpackedValueToScatter = Builder.CreateIntrinsic( + HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastSrc, nullptr); + LLVM_DEBUG(dbgs() << " UnpackedValToScat: " << *UnpackedValueToScatter + << ")\n"); + + [[maybe_unused]] Value *UVSHi = + HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter); + [[maybe_unused]] Value *UVSLo = + HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter); + LLVM_DEBUG(dbgs() << " UVSHi : " << *UVSHi << ")\n"); + LLVM_DEBUG(dbgs() << " UVSLo : " << *UVSLo << ")\n"); + + // Create the mask for individual bytes + auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff); + LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n"); + [[maybe_unused]] auto *ResHi = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B, + {QByteMask, CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + IndexHi, UVSHi}, + nullptr); + LLVM_DEBUG(dbgs() << " ResHi : " << *ResHi << ")\n"); + return Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B, + {QByteMask, CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + IndexLo, UVSLo}, + nullptr); + } else if (ElemWidth == 2) { + Value *CastSrc = + getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, ValueToScatter); + LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n"); + return Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B, + {CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), CastIndex, + CastSrc}, + nullptr); + } else if (ElemWidth == 4) { + return Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B, + {CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), CastIndex, + ValueToScatter}, + nullptr); + } else { + LLVM_DEBUG(dbgs() << "Unhandled element type for vscatter\n"); + return nullptr; + } +} + +Value *HvxIdioms::processVGather(Instruction &In) const { + [[maybe_unused]] auto *InpTy = + dyn_cast<VectorType>(In.getOperand(0)->getType()); + assert(InpTy && "Cannot handle no vector type for llvm.gather"); + [[maybe_unused]] auto *ElemTy = + dyn_cast<PointerType>(InpTy->getElementType()); + assert(ElemTy && "llvm.gather needs vector of ptr argument"); + auto *F = In.getFunction(); + LLVMContext &Ctx = F->getContext(); + LLVM_DEBUG(dbgs() << "\n[Process gather](" << In << ")\n" + << *In.getParent() << "\n"); + LLVM_DEBUG(dbgs() << " Input type(" << *InpTy << ") elements(" + << HVC.length(InpTy) << ") VecLen(" << HVC.getSizeOf(InpTy) + << ") type(" << *ElemTy << ") Access alignment(" + << *In.getOperand(1) << ") AddressSpace(" + << ElemTy->getAddressSpace() << ")\n"); + + // TODO: Handle masking of elements. + assert(dyn_cast<VectorType>(In.getOperand(2)->getType()) && + "llvm.gather needs vector for mask"); + IRBuilder Builder(In.getParent(), In.getIterator(), + InstSimplifyFolder(HVC.DL)); + + // See who is using the result. The difference between LLVM and HVX vgather + // Intrinsic makes it impossible to handle all cases with temp storage. Alloca + // in VTCM is not yet supported, so for now we just bail out for those cases. + HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined; + Instruction *Dst = locateDestination(&In, Qual); + if (!Dst) { + LLVM_DEBUG(dbgs() << " Unable to locate vgather destination\n"); + return nullptr; + } + LLVM_DEBUG(dbgs() << " Destination : " << *Dst << " Qual(" << Qual + << ")\n"); + + // Address of destination. Must be in VTCM. + auto *Ptr = getPointer(Dst); + if (!Ptr) { + LLVM_DEBUG(dbgs() << "Could not locate vgather destination ptr\n"); + return nullptr; + } + + // Result type. Assume it is a vector type. + auto *DstType = cast<VectorType>(getIndexType(Dst)); + assert(DstType && "Cannot handle non vector dst type for llvm.gather"); + + // Base address for sources to be loaded + auto *IndexLoad = locateAddressFromIntrinsic(&In); + if (!IndexLoad) + return nullptr; + LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n"); + + // Gather indexes/offsets + auto *Indexes = locateIndexesFromIntrinsic(&In); + if (!Indexes) + return nullptr; + LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n"); + + Instruction *Gather = nullptr; + Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false); + if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) { + // We fully assume the address space is in VTCM. We also assume that all + // pointers in Operand(0) have the same base(!). + // This is the most basic case of all the above. + unsigned OutputSize = HVC.getSizeOf(DstType); + auto *DstElemTy = cast<IntegerType>(DstType->getElementType()); + unsigned ElemWidth = HVC.DL.getTypeAllocSize(DstElemTy); + LLVM_DEBUG(dbgs() << " Buffer type : " << *Ptr->getType() + << " Address space (" + << Ptr->getType()->getPointerAddressSpace() << ")\n" + << " Result type : " << *DstType + << "\n Size in bytes : " << OutputSize + << " element type(" << *DstElemTy + << ")\n ElemWidth : " << ElemWidth << " bytes\n"); + + auto *IndexType = cast<VectorType>(getIndexType(Indexes)); + assert(IndexType && "Cannot handle non vector index type for llvm.gather"); + unsigned IndexWidth = HVC.DL.getTypeAllocSize(IndexType->getElementType()); + LLVM_DEBUG(dbgs() << " IndexWidth(" << IndexWidth << ")\n"); + + // Intrinsic takes i32 instead of pointer so cast. + Value *CastedPtr = Builder.CreateBitOrPointerCast( + IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + // [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, ...] + // int_hexagon_V6_vgathermh [... , llvm_v16i32_ty] + // int_hexagon_V6_vgathermh_128B [... , llvm_v32i32_ty] + // int_hexagon_V6_vgathermhw [... , llvm_v32i32_ty] + // int_hexagon_V6_vgathermhw_128B [... , llvm_v64i32_ty] + // int_hexagon_V6_vgathermw [... , llvm_v16i32_ty] + // int_hexagon_V6_vgathermw_128B [... , llvm_v32i32_ty] + if (HVC.HST.getVectorLength() == OutputSize) { + if (ElemWidth == 1) { + // v128i8 There is no native instruction for this. + // Do this as two Hi/Lo gathers with masking. + // Unpack indexes. We assume that indexes are in 128i8 format - need to + // expand them to Hi/Lo 64i16 + Value *CastIndexes = + Builder.CreateBitCast(Indexes, NT, "cast_to_32i32"); + auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub); + auto *UnpackedIndexes = + Builder.CreateIntrinsic(HVC.getHvxTy(HVC.getIntTy(32), true), + V6_vunpack, CastIndexes, nullptr); + LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes + << ")\n"); + + auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi); + auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo); + [[maybe_unused]] Value *IndexHi = + HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes); + [[maybe_unused]] Value *IndexLo = + HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes); + LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n"); + LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n"); + // Create the mask for individual bytes + auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff); + LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n"); + // We use our destination allocation as a temp storage + // This is unlikely to work properly for masked gather. + auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermhq); + [[maybe_unused]] auto GatherHi = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), V6_vgather, + {Ptr, QByteMask, CastedPtr, + HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), IndexHi}, + nullptr); + LLVM_DEBUG(dbgs() << " GatherHi : " << *GatherHi << ")\n"); + // Rematerialize the result + [[maybe_unused]] Value *LoadedResultHi = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_hi"); + LLVM_DEBUG(dbgs() << " LoadedResultHi : " << *LoadedResultHi << "\n"); + // Same for the low part. Here we use Gather to return non-NULL result + // from this function and continue to iterate. We also are deleting Dst + // store below. + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), V6_vgather, + {Ptr, QByteMask, CastedPtr, + HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), IndexLo}, + nullptr); + LLVM_DEBUG(dbgs() << " GatherLo : " << *Gather << ")\n"); + Value *LoadedResultLo = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_lo"); + LLVM_DEBUG(dbgs() << " LoadedResultLo : " << *LoadedResultLo << "\n"); + // Now we have properly sized bytes in every other position + // B b A a c a A b B c f F g G h H is presented as + // B . b . A . a . c . a . A . b . B . c . f . F . g . G . h . H + // Use vpack to gather them + auto V6_vpackeb = HVC.HST.getIntrinsicId(Hexagon::V6_vpackeb); + [[maybe_unused]] auto Res = Builder.CreateIntrinsic( + NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo}, nullptr); + LLVM_DEBUG(dbgs() << " ScaledRes : " << *Res << "\n"); + [[maybe_unused]] auto *StoreRes = Builder.CreateStore(Res, Ptr); + LLVM_DEBUG(dbgs() << " StoreRes : " << *StoreRes << "\n"); + } else if (ElemWidth == 2) { + // v32i16 + if (IndexWidth == 2) { + // Reinterprete 64i16 as 32i32. Only needed for syntactic IR match. + Value *CastIndex = + getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes); + LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n"); + // shift all i16 left by 1 to match short addressing mode instead of + // byte. + auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh); + Value *AdjustedIndex = HVC.createHvxIntrinsic( + Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)}); + LLVM_DEBUG(dbgs() + << " Shifted half index: " << *AdjustedIndex << ")\n"); + + auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermh); + // The 3rd argument is the size of the region to gather from. Probably + // want to set it to max VTCM size. + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), V6_vgather, + {Ptr, CastedPtr, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + AdjustedIndex}, + nullptr); + for (auto &U : Dst->uses()) { + if (auto *UI = dyn_cast<Instruction>(U.getUser())) + dbgs() << " dst used by: " << *UI << "\n"; + } + for (auto &U : In.uses()) { + if (auto *UI = dyn_cast<Instruction>(U.getUser())) + dbgs() << " In used by : " << *UI << "\n"; + } + // Create temp load from result in case the result is used by any + // other instruction. + Value *LoadedResult = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(16), false), Ptr, "temp_result"); + LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n"); + In.replaceAllUsesWith(LoadedResult); + } else { + dbgs() << " Unhandled index type for vgather\n"; + return nullptr; + } + } else if (ElemWidth == 4) { + if (IndexWidth == 4) { + // v32i32 + auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh); + Value *AdjustedIndex = HVC.createHvxIntrinsic( + Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)}); + LLVM_DEBUG(dbgs() + << " Shifted word index: " << *AdjustedIndex << ")\n"); + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B, + {Ptr, CastedPtr, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + AdjustedIndex}, + nullptr); + } else { + LLVM_DEBUG(dbgs() << " Unhandled index type for vgather\n"); + return nullptr; + } + } else { + LLVM_DEBUG(dbgs() << " Unhandled element type for vgather\n"); + return nullptr; + } + } else if (HVC.HST.getVectorLength() == OutputSize * 2) { + // This is half of the reg width, duplicate low in high + LLVM_DEBUG(dbgs() << " Unhandled half of register size\n"); + return nullptr; + } else if (HVC.HST.getVectorLength() * 2 == OutputSize) { + LLVM_DEBUG(dbgs() << " Unhandle twice the register size\n"); + return nullptr; + } + // Erase the original intrinsic and store that consumes it. + // HVX will create a pseudo for gather that is expanded to gather + store + // during packetization. + Dst->eraseFromParent(); + } else if (Qual == HvxIdioms::LLVM_Scatter) { + // Gather feeds directly into scatter. + LLVM_DEBUG({ + auto *DstInpTy = cast<VectorType>(Dst->getOperand(1)->getType()); + assert(DstInpTy && "Cannot handle no vector type for llvm.scatter"); + unsigned DstInpSize = HVC.getSizeOf(DstInpTy); + unsigned DstElements = HVC.length(DstInpTy); + auto *DstElemTy = cast<PointerType>(DstInpTy->getElementType()); + assert(DstElemTy && "llvm.scatter needs vector of ptr argument"); + dbgs() << " Gather feeds into scatter\n Values to scatter : " + << *Dst->getOperand(0) << "\n"; + dbgs() << " Dst type(" << *DstInpTy << ") elements(" << DstElements + << ") VecLen(" << DstInpSize << ") type(" << *DstElemTy + << ") Access alignment(" << *Dst->getOperand(2) << ")\n"; + }); + // Address of source + auto *Src = getPointer(IndexLoad); + if (!Src) + return nullptr; + LLVM_DEBUG(dbgs() << " Src : " << *Src << "\n"); + + if (!isa<PointerType>(Src->getType())) { + LLVM_DEBUG(dbgs() << " Source is not a pointer type...\n"); + return nullptr; + } + + Value *CastedSrc = Builder.CreateBitOrPointerCast( + Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n"); + + auto *DstLoad = locateAddressFromIntrinsic(Dst); + if (!DstLoad) { + LLVM_DEBUG(dbgs() << " Unable to locate DstLoad\n"); + return nullptr; + } + LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n"); + + Value *Ptr = getPointer(DstLoad); + if (!Ptr) + return nullptr; + LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n"); + Value *CastIndex = + getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, IndexLoad); + LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n"); + // Shift all i16 left by 1 to match short addressing mode instead of + // byte. + auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh); + Value *AdjustedIndex = HVC.createHvxIntrinsic( + Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)}); + LLVM_DEBUG(dbgs() << " Shifted half index: " << *AdjustedIndex << ")\n"); + + return Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B, + {Ptr, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + AdjustedIndex}, + nullptr); + } else if (Qual == HvxIdioms::HEX_Gather_Scatter) { + // Gather feeds into previously inserted pseudo intrinsic. + // These could not be in the same packet, so we need to generate another + // pseudo that is expanded to .tmp + store V6_vgathermh_pseudo + // V6_vgathermh_pseudo (ins IntRegs:$_dst_, s4_0Imm:$Ii, IntRegs:$Rt, + // ModRegs:$Mu, HvxVR:$Vv) + if (isa<AllocaInst>(IndexLoad)) { + auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes); + if (cstDataVector) { + // Our indexes are represented as a constant. We need THEM in a reg. + // This most likely will not work properly since alloca gives us DDR + // stack location. This will be fixed once we teach compiler about VTCM. + AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT); + [[maybe_unused]] auto *StoreIndexes = + Builder.CreateStore(cstDataVector, IndexesAlloca); + LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n"); + Value *LoadedIndex = Builder.CreateLoad( + IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index"); + AllocaInst *ResultAlloca = Builder.CreateAlloca(NT); + LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca << "\n"); + + Value *CastedSrc = Builder.CreateBitOrPointerCast( + IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n"); + + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B, + {ResultAlloca, CastedSrc, + HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), LoadedIndex}, + nullptr); + Value *LoadedResult = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result"); + LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n"); + LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n"); + In.replaceAllUsesWith(LoadedResult); + } + } else { + // Address of source + auto *Src = getPointer(IndexLoad); + if (!Src) + return nullptr; + LLVM_DEBUG(dbgs() << " Src : " << *Src << "\n"); + + Value *CastedSrc = Builder.CreateBitOrPointerCast( + Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n"); + + auto *DstLoad = locateAddressFromIntrinsic(Dst); + if (!DstLoad) + return nullptr; + LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n"); + auto *Ptr = getPointer(DstLoad); + if (!Ptr) + return nullptr; + LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n"); + + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh, + {Ptr, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + Indexes}, + nullptr); + } + return Gather; + } else if (Qual == HvxIdioms::HEX_Scatter) { + // This is the case when result of a gather is used as an argument to + // Intrinsic::hexagon_V6_vscattermh_128B. Most likely we just inserted it + // ourselves. We have to create alloca, store to it, and replace all uses + // with that. + AllocaInst *ResultAlloca = Builder.CreateAlloca(NT); + Value *CastedSrc = Builder.CreateBitOrPointerCast( + IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n"); + Value *CastIndex = + getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes); + LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n"); + + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B, + {ResultAlloca, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + CastIndex}, + nullptr); + Value *LoadedResult = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result"); + LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n"); + In.replaceAllUsesWith(LoadedResult); + } else if (Qual == HvxIdioms::HEX_Gather) { + // Gather feeds to another gather but already replaced with + // hexagon_V6_vgathermh_128B + if (isa<AllocaInst>(IndexLoad)) { + auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes); + if (cstDataVector) { + // Our indexes are represented as a constant. We need it in a reg. + AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT); + + [[maybe_unused]] auto *StoreIndexes = + Builder.CreateStore(cstDataVector, IndexesAlloca); + LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n"); + Value *LoadedIndex = Builder.CreateLoad( + IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index"); + AllocaInst *ResultAlloca = Builder.CreateAlloca(NT); + LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca + << "\n AddressSpace: " + << ResultAlloca->getAddressSpace() << "\n";); + + Value *CastedSrc = Builder.CreateBitOrPointerCast( + IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n"); + + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B, + {ResultAlloca, CastedSrc, + HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), LoadedIndex}, + nullptr); + Value *LoadedResult = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result"); + LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n"); + LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n"); + In.replaceAllUsesWith(LoadedResult); + } + } + } else if (Qual == HvxIdioms::LLVM_Gather) { + // Gather feeds into another gather + errs() << " Underimplemented vgather to vgather sequence\n"; + return nullptr; + } else + llvm_unreachable("Unhandled Qual enum"); + + return Gather; +} + auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In, const FxpOp &Op) const -> Value * { assert(Op.X.Val->getType() == Op.Y.Val->getType()); @@ -2138,6 +3000,26 @@ auto HvxIdioms::run() -> bool { It = StartOver ? B.rbegin() : cast<Instruction>(New)->getReverseIterator(); Changed = true; + } else if (matchGather(*It)) { + Value *New = processVGather(*It); + if (!New) + continue; + LLVM_DEBUG(dbgs() << " Gather : " << *New << "\n"); + // We replace original intrinsic with a new pseudo call. + It->eraseFromParent(); + It = cast<Instruction>(New)->getReverseIterator(); + RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI); + Changed = true; + } else if (matchScatter(*It)) { + Value *New = processVScatter(*It); + if (!New) + continue; + LLVM_DEBUG(dbgs() << " Scatter : " << *New << "\n"); + // We replace original intrinsic with a new pseudo call. + It->eraseFromParent(); + It = cast<Instruction>(New)->getReverseIterator(); + RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI); + Changed = true; } } } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 6455757..2f59b7c 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -186,6 +186,9 @@ static unsigned featureToArchVersion(unsigned Feature) { case Hexagon::ArchV79: case Hexagon::ExtensionHVXV79: return 79; + case Hexagon::ArchV81: + case Hexagon::ExtensionHVXV81: + return 81; } llvm_unreachable("Expected valid arch feature"); return 0; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 6b48a21..b8075bd 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -96,6 +96,8 @@ cl::opt<bool> MV75("mv75", cl::Hidden, cl::desc("Build for Hexagon V75"), cl::init(false)); cl::opt<bool> MV79("mv79", cl::Hidden, cl::desc("Build for Hexagon V79"), cl::init(false)); +cl::opt<bool> MV81("mv81", cl::Hidden, cl::desc("Build for Hexagon V81"), + cl::init(false)); } // namespace static cl::opt<Hexagon::ArchEnum> EnableHVX( @@ -111,6 +113,7 @@ static cl::opt<Hexagon::ArchEnum> EnableHVX( clEnumValN(Hexagon::ArchEnum::V73, "v73", "Build for HVX v73"), clEnumValN(Hexagon::ArchEnum::V75, "v75", "Build for HVX v75"), clEnumValN(Hexagon::ArchEnum::V79, "v79", "Build for HVX v79"), + clEnumValN(Hexagon::ArchEnum::V81, "v81", "Build for HVX v81"), // Sentinel for no value specified. clEnumValN(Hexagon::ArchEnum::Generic, "", "")), // Sentinel for flag not present. @@ -159,6 +162,8 @@ static StringRef HexagonGetArchVariant() { return "hexagonv75"; if (MV79) return "hexagonv79"; + if (MV81) + return "hexagonv81"; return ""; } @@ -474,6 +479,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) { case Hexagon::ArchEnum::V79: Result.push_back("+hvxv79"); break; + case Hexagon::ArchEnum::V81: + Result.push_back("+hvxv81"); + break; case Hexagon::ArchEnum::Generic: { Result.push_back(StringSwitch<StringRef>(CPU) @@ -489,7 +497,8 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) { .Case("hexagonv71t", "+hvxv71") .Case("hexagonv73", "+hvxv73") .Case("hexagonv75", "+hvxv75") - .Case("hexagonv79", "+hvxv79")); + .Case("hexagonv79", "+hvxv79") + .Case("hexagonv81", "+hvxv81")); break; } case Hexagon::ArchEnum::NoArch: @@ -538,8 +547,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { FeatureBitset FB = S; unsigned CpuArch = ArchV5; for (unsigned F : - {ArchV79, ArchV75, ArchV73, ArchV71, ArchV69, ArchV68, ArchV67, ArchV66, - ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) { + {ArchV81, ArchV79, ArchV75, ArchV73, ArchV71, ArchV69, ArchV68, ArchV67, + ArchV66, ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) { if (!FB.test(F)) continue; CpuArch = F; @@ -556,7 +565,7 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66, ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71, - ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79}) { + ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79, ExtensionHVXV81}) { if (!FB.test(F)) continue; HasHvxVer = true; @@ -569,6 +578,9 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { // HasHvxVer is false, and UseHvx is true. switch (CpuArch) { + case ArchV81: + FB.set(ExtensionHVXV81); + [[fallthrough]]; case ArchV79: FB.set(ExtensionHVXV79); [[fallthrough]]; @@ -668,12 +680,12 @@ void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS) { std::optional<unsigned> Hexagon_MC::getHVXVersion(const FeatureBitset &Features) { - for (auto Arch : {Hexagon::ExtensionHVXV79, Hexagon::ExtensionHVXV75, - Hexagon::ExtensionHVXV73, Hexagon::ExtensionHVXV71, - Hexagon::ExtensionHVXV69, Hexagon::ExtensionHVXV68, - Hexagon::ExtensionHVXV67, Hexagon::ExtensionHVXV66, - Hexagon::ExtensionHVXV65, Hexagon::ExtensionHVXV62, - Hexagon::ExtensionHVXV60}) + for (auto Arch : {Hexagon::ExtensionHVXV81, Hexagon::ExtensionHVXV79, + Hexagon::ExtensionHVXV75, Hexagon::ExtensionHVXV73, + Hexagon::ExtensionHVXV71, Hexagon::ExtensionHVXV69, + Hexagon::ExtensionHVXV68, Hexagon::ExtensionHVXV67, + Hexagon::ExtensionHVXV66, Hexagon::ExtensionHVXV65, + Hexagon::ExtensionHVXV62, Hexagon::ExtensionHVXV60}) if (Features.test(Arch)) return Arch; return {}; @@ -681,13 +693,13 @@ Hexagon_MC::getHVXVersion(const FeatureBitset &Features) { unsigned Hexagon_MC::getArchVersion(const FeatureBitset &Features) { for (auto Arch : - {Hexagon::ArchV79, Hexagon::ArchV75, Hexagon::ArchV73, Hexagon::ArchV71, - Hexagon::ArchV69, Hexagon::ArchV68, Hexagon::ArchV67, Hexagon::ArchV66, - Hexagon::ArchV65, Hexagon::ArchV62, Hexagon::ArchV60, Hexagon::ArchV55, - Hexagon::ArchV5}) + {Hexagon::ArchV81, Hexagon::ArchV79, Hexagon::ArchV75, Hexagon::ArchV73, + Hexagon::ArchV71, Hexagon::ArchV69, Hexagon::ArchV68, Hexagon::ArchV67, + Hexagon::ArchV66, Hexagon::ArchV65, Hexagon::ArchV62, Hexagon::ArchV60, + Hexagon::ArchV55, Hexagon::ArchV5}) if (Features.test(Arch)) return Arch; - llvm_unreachable("Expected arch v5-v79"); + llvm_unreachable("Expected arch v5-v81"); return 0; } @@ -708,7 +720,8 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) { .Case("hexagonv71t", llvm::ELF::EF_HEXAGON_MACH_V71T) .Case("hexagonv73", llvm::ELF::EF_HEXAGON_MACH_V73) .Case("hexagonv75", llvm::ELF::EF_HEXAGON_MACH_V75) - .Case("hexagonv79", llvm::ELF::EF_HEXAGON_MACH_V79); + .Case("hexagonv79", llvm::ELF::EF_HEXAGON_MACH_V79) + .Case("hexagonv81", llvm::ELF::EF_HEXAGON_MACH_V81); } llvm::ArrayRef<MCPhysReg> Hexagon_MC::GetVectRegRev() { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index aca7abd..44d1a44 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -4578,6 +4578,8 @@ def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>; def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>; def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>; +def : InstAlias<"mtpidr $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsISA3_0]>; +def : InstAlias<"mfpidr $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsISA3_0]>; foreach SPRG = 4-7 in { def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>, diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 9e6b7f0..2754d78 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1124,7 +1124,8 @@ def HasStdExtZbkbOrP "'Base P' (Packed-SIMD)">; def HasStdExtZbbOrZbkbOrP - : Predicate<"Subtarget->HasStdExtZbbOrZbkb()|| Subtarget->hasStdExtP()">, + : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb() || " + "Subtarget->hasStdExtP()">, AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbkb, FeatureStdExtP), "'Zbb' (Basic Bit-Manipulation) or " "'Zbkb' (Bitmanip instructions for Cryptography) or " diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 219e3f2..1c930ac 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -318,8 +318,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); - if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() && - !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() && + if (!Subtarget.hasStdExtZbb() && !Subtarget.hasStdExtP() && + !Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() && + !Subtarget.hasVendorXAndesPerf() && !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand); @@ -392,7 +393,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); } - if (Subtarget.hasStdExtZbb() || + if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP() || (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) { setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT, Legal); @@ -403,6 +404,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); } else { setOperationAction(ISD::CTTZ, XLenVT, Expand); + // If have a CLZW, but not CTZW, custom promote i32. + if (Subtarget.hasStdExtP() && Subtarget.is64Bit()) + setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); } if (!Subtarget.hasCPOPLike()) { @@ -419,13 +423,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // We need the custom lowering to make sure that the resulting sequence // for the 32bit case is efficient on 64bit targets. // Use default promotion for i32 without Zbb. - if (Subtarget.is64Bit() && Subtarget.hasStdExtZbb()) + if (Subtarget.is64Bit() && + (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP())) setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom); } else { setOperationAction(ISD::CTLZ, XLenVT, Expand); } - if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) { + if (Subtarget.hasStdExtP() || + (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) { setOperationAction(ISD::ABS, XLenVT, Legal); } else if (Subtarget.hasShortForwardBranchOpt()) { // We can use PseudoCCSUB to implement ABS. @@ -14669,6 +14675,25 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); bool IsCTZ = N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF; + + // Without Zbb, lower as 32 - clzw(~X & (X-1)) + if (IsCTZ && !Subtarget.hasStdExtZbb()) { + assert(Subtarget.hasStdExtP()); + + NewOp0 = DAG.getFreeze(NewOp0); + SDValue Not = DAG.getNOT(DL, NewOp0, MVT::i64); + SDValue Minus1 = DAG.getNode(ISD::SUB, DL, MVT::i64, NewOp0, + DAG.getConstant(1, DL, MVT::i64)); + SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Not, Minus1); + SDValue CLZW = DAG.getNode(RISCVISD::CLZW, DL, MVT::i64, And); + SDValue Sub = DAG.getNode(ISD::SUB, DL, MVT::i64, + DAG.getConstant(32, DL, MVT::i64), CLZW); + SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Sub, + DAG.getValueType(MVT::i32)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + return; + } + unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW; SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 7d8a919..cc085bb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1455,3 +1455,11 @@ let Predicates = [HasStdExtP, IsRV32] in { def PMAXU_DW : RVPPairBinaryExchanged_rr<0b1111, 0b01, "pmaxu.dw">; def PMAXU_DB : RVPPairBinaryExchanged_rr<0b1111, 0b10, "pmaxu.db">; } // Predicates = [HasStdExtP, IsRV32] + + +//===----------------------------------------------------------------------===// +// Codegen patterns +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtP] in +def : PatGpr<abs, ABS>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 4c2f7f6..f7b4914 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -218,11 +218,13 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0, } let Predicates = [HasVendorXSfvfexpAny], DecoderNamespace = "XSfvector" in { - def SF_VFEXP_V : VALUVs2<0b010011, 0b00111, OPFVV, "sf.vfexp.v">; + def SF_VFEXP_V : VALUVs2<0b010011, 0b00111, OPFVV, "sf.vfexp.v">, + SchedUnaryMC<"WriteSF_VFExp", "ReadSF_VFExp">; } let Predicates = [HasVendorXSfvfexpa], DecoderNamespace = "XSfvector" in { - def SF_VFEXPA_V : VALUVs2<0b010011, 0b00110, OPFVV, "sf.vfexpa.v">; + def SF_VFEXPA_V : VALUVs2<0b010011, 0b00110, OPFVV, "sf.vfexpa.v">, + SchedUnaryMC<"WriteSF_VFExpa", "ReadSF_VFExpa">; } let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvector", @@ -487,6 +489,48 @@ let Predicates = [HasVendorXSfvfnrclipxfqf], AltFmtType = IS_NOT_ALTFMT in { defm SF_VFNRCLIP_X_F_QF : VPseudoSiFiveVFNRCLIP; } +class VFExpSchedSEWSet<string mx, bit IsBF16, bit IsApprox> { + defvar BaseSet = SchedSEWSet<mx, isF=1>.val; + list<int> val = !if(IsBF16, !listremove(BaseSet, [32, 64]), + !if(IsApprox, BaseSet, !listremove(BaseSet, [64]))); +} +multiclass VPseudoVFExp_V<bit IsBF16 = false, bit IsApprox = false> { + defvar SchedSuffix = !if(IsApprox, "VFExpa", "VFExp"); + + foreach m = MxListF in { + defvar mx = m.MX; + foreach e = VFExpSchedSEWSet<mx, IsBF16, IsApprox>.val in { + let VLMul = m.value in { + def "_V_" # mx # "_E" # e + : VPseudoUnaryNoMask<m.vrclass, m.vrclass>, + SchedUnary<"WriteSF_" # SchedSuffix, "ReadSF_" # SchedSuffix, + mx, e, forcePassthruRead=true>; + def "_V_" # mx # "_E" # e # "_MASK" + : VPseudoUnaryMask<m.vrclass, m.vrclass>, + RISCVMaskedPseudo<MaskIdx = 2>, + SchedUnary<"WriteSF_" # SchedSuffix, "ReadSF_" # SchedSuffix, + mx, e, forcePassthruRead=true>; + } + } + } +} + +let Predicates = [HasVendorXSfvfbfexp16e], hasSideEffects = 0 in { + let AltFmtType = IS_ALTFMT in { + defm PseudoSF_VFEXP_ALT : VPseudoVFExp_V<IsBF16=true>; + } +} + +let Predicates = [HasVendorXSfvfexpAnyFloat], hasSideEffects = 0 in { + let AltFmtType = IS_NOT_ALTFMT in { + defm PseudoSF_VFEXP : VPseudoVFExp_V; + } +} + +let Predicates = [HasVendorXSfvfexpa], AltFmtType = IS_NOT_ALTFMT in { + defm PseudoSF_VFEXPA : VPseudoVFExp_V<IsApprox=true>; +} + // SDNode def SDT_SF_VC_V_X : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisVT<1, XLenVT>, @@ -893,3 +937,36 @@ let Predicates = [HasVendorXSfcease] in { let rs2 = 0b00101; } } + +let Predicates = [HasVendorXSfvfbfexp16e] in { + defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP_ALT", + AllBF16Vectors, + isSEWAware=1>; +} + +let Predicates = [HasVendorXSfvfexp16e] in { + defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP", + [VF16MF4, VF16MF2, VF16M1, VF16M2, VF16M4, VF16M8], + isSEWAware=1>; +} + +let Predicates = [HasVendorXSfvfexp32e] in { + defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP", + [VF32MF2, VF32M1, VF32M2, VF32M4, VF32M8], isSEWAware=1>; +} + +let Predicates = [HasVendorXSfvfexpa] in { + defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA", + [VF32MF2, VF32M1, VF32M2, VF32M4, VF32M8], isSEWAware=1>; +} + +let Predicates = [HasVendorXSfvfexpa, HasVInstructionsF16] in { + defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA", + [VF16MF4, VF16MF2, VF16M1, VF16M2, VF16M4, VF16M8], + isSEWAware=1>; +} + +let Predicates = [HasVendorXSfvfexpa64e] in { + defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA", + [VF64M1, VF64M2, VF64M4, VF64M8], isSEWAware=1>; +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 6b9a75f..5429c2a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -599,14 +599,20 @@ def : PatGpr<riscv_zip, ZIP_RV32, i32>; def : PatGpr<riscv_unzip, UNZIP_RV32, i32>; } // Predicates = [HasStdExtZbkb, IsRV32] -let Predicates = [HasStdExtZbb] in { +let Predicates = [HasStdExtZbbOrP] in { def : PatGpr<ctlz, CLZ>; +} + +let Predicates = [HasStdExtZbb] in { def : PatGpr<cttz, CTZ>; def : PatGpr<ctpop, CPOP>; } // Predicates = [HasStdExtZbb] -let Predicates = [HasStdExtZbb, IsRV64] in { +let Predicates = [HasStdExtZbbOrP, IsRV64] in { def : PatGpr<riscv_clzw, CLZW>; +} + +let Predicates = [HasStdExtZbb, IsRV64] in { def : PatGpr<riscv_ctzw, CTZW>; def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>; @@ -614,22 +620,22 @@ def : Pat<(i64 (riscv_negw_max GPR:$rs1)), (MAX GPR:$rs1, (XLenVT (SUBW (XLenVT X0), GPR:$rs1)))>; } // Predicates = [HasStdExtZbb, IsRV64] -let Predicates = [HasStdExtZbb] in { +let Predicates = [HasStdExtZbbOrP] in { def : Pat<(XLenVT (sext_inreg GPR:$rs1, i8)), (SEXT_B GPR:$rs1)>; def : Pat<(XLenVT (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>; } // Predicates = [HasStdExtZbb] -let Predicates = [HasStdExtZbb] in { +let Predicates = [HasStdExtZbbOrP] in { def : PatGprGpr<smin, MIN>; def : PatGprGpr<smax, MAX>; def : PatGprGpr<umin, MINU>; def : PatGprGpr<umax, MAXU>; } // Predicates = [HasStdExtZbb] -let Predicates = [HasStdExtZbbOrZbkb, IsRV32] in +let Predicates = [HasStdExtZbbOrZbkbOrP, IsRV32] in def : PatGpr<bswap, REV8_RV32, i32>; -let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in +let Predicates = [HasStdExtZbbOrZbkbOrP, IsRV64] in def : PatGpr<bswap, REV8_RV64, i64>; let Predicates = [HasStdExtZbkb] in { diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 637d61fe..36a2f46 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -1588,6 +1588,10 @@ multiclass SiFive7SchedResources<int vlen, bit dualVALU, //===----------------------------------------------------------------------===// // Unsupported extensions defm : UnsupportedSchedQ; + // TODO: scheduling info of XSfvfexp* and XSfvfexpa* + // for SiFive7 will be added in follow-up patches. + defm : UnsupportedSchedXSfvfexp; + defm : UnsupportedSchedXSfvfexpa; defm : UnsupportedSchedZabha; defm : UnsupportedSchedZbc; defm : UnsupportedSchedZbkb; diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td index 9ab9636..64ccfd8 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -523,6 +523,8 @@ include "RISCVScheduleZvk.td" // Vendor Extensions multiclass UnsupportedSchedXsf { defm : UnsupportedSchedXsfvcp; + defm : UnsupportedSchedXSfvfexp; + defm : UnsupportedSchedXSfvfexpa; defm : UnsupportedSchedXSfvfnrclipxfqf; defm : UnsupportedSchedXSfvfwmaccqqq; defm : UnsupportedSchedXSfvqmaccdod; diff --git a/llvm/lib/Target/RISCV/RISCVScheduleXSf.td b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td index 99632e4..1ee6dc1 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleXSf.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td @@ -99,3 +99,23 @@ defm : LMULWriteRes<"WriteSF_VFWMACC_QQQ", []>; defm : LMULReadAdvance<"ReadSF_VFWMACC_QQQ", 0>; } // Unsupported = true } + +defm "" : LMULSEWSchedWritesF<"WriteSF_VFExp">; +defm "" : LMULSEWSchedReadsF<"ReadSF_VFExp">; + +multiclass UnsupportedSchedXSfvfexp { +let Unsupported = true in { +defm : LMULSEWWriteResF<"WriteSF_VFExp", []>; +defm : LMULSEWReadAdvanceF<"ReadSF_VFExp", 0>; +} // Unsupported = true +} + +defm "" : LMULSEWSchedWritesF<"WriteSF_VFExpa">; +defm "" : LMULSEWSchedReadsF<"ReadSF_VFExpa">; + +multiclass UnsupportedSchedXSfvfexpa { +let Unsupported = true in { +defm : LMULSEWWriteResF<"WriteSF_VFExpa", []>; +defm : LMULSEWReadAdvanceF<"ReadSF_VFExpa", 0>; +} // Unsupported = true +} diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 334db4b..4b4fc8f 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -187,7 +187,7 @@ public: } bool hasCLZLike() const { - return HasStdExtZbb || HasVendorXTHeadBb || + return HasStdExtZbb || HasStdExtP || HasVendorXTHeadBb || (HasVendorXCVbitmanip && !IsRV64); } bool hasCTZLike() const { @@ -197,7 +197,7 @@ public: return HasStdExtZbb || (HasVendorXCVbitmanip && !IsRV64); } bool hasREV8Like() const { - return HasStdExtZbb || HasStdExtZbkb || HasVendorXTHeadBb; + return HasStdExtZbb || HasStdExtZbkb || HasStdExtP || HasVendorXTHeadBb; } bool hasBEXTILike() const { return HasStdExtZbs || HasVendorXTHeadBs; } diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 6261fad..706ab2b 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -160,6 +160,14 @@ FunctionPass *createX86PartialReductionPass(); /// // Analyzes and emits pseudos to support Win x64 Unwind V2. FunctionPass *createX86WinEHUnwindV2Pass(); +/// The pass transforms load/store <256 x i32> to AMX load/store intrinsics +/// or split the data to two <128 x i32>. +FunctionPass *createX86LowerAMXTypePass(); + +/// The pass transforms amx intrinsics to scalar operation if the function has +/// optnone attribute or it is O0. +FunctionPass *createX86LowerAMXIntrinsicsPass(); + InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &, const X86RegisterBankInfo &); diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 62073ec..4393f6e 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -4721,9 +4721,6 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { if (!(Subtarget->hasVLX() || NVT.is512BitVector())) return false; - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - auto getFoldableLogicOp = [](SDValue Op) { // Peek through single use bitcast. if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse()) @@ -4740,13 +4737,47 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { return SDValue(); }; - SDValue A, FoldableOp; - if ((FoldableOp = getFoldableLogicOp(N1))) { - A = N0; - } else if ((FoldableOp = getFoldableLogicOp(N0))) { - A = N1; - } else - return false; + SDValue N0, N1, A, FoldableOp; + + // Identify and (optionally) peel an outer NOT that wraps a pure logic tree + auto tryPeelOuterNotWrappingLogic = [&](SDNode *Op) { + if (Op->getOpcode() == ISD::XOR && Op->hasOneUse() && + ISD::isBuildVectorAllOnes(Op->getOperand(1).getNode())) { + SDValue InnerOp = Op->getOperand(0); + + if (!getFoldableLogicOp(InnerOp)) + return SDValue(); + + N0 = InnerOp.getOperand(0); + N1 = InnerOp.getOperand(1); + if ((FoldableOp = getFoldableLogicOp(N1))) { + A = N0; + return InnerOp; + } + if ((FoldableOp = getFoldableLogicOp(N0))) { + A = N1; + return InnerOp; + } + } + return SDValue(); + }; + + bool PeeledOuterNot = false; + SDNode *OriN = N; + if (SDValue InnerOp = tryPeelOuterNotWrappingLogic(N)) { + PeeledOuterNot = true; + N = InnerOp.getNode(); + } else { + N0 = N->getOperand(0); + N1 = N->getOperand(1); + + if ((FoldableOp = getFoldableLogicOp(N1))) + A = N0; + else if ((FoldableOp = getFoldableLogicOp(N0))) + A = N1; + else + return false; + } SDValue B = FoldableOp.getOperand(0); SDValue C = FoldableOp.getOperand(1); @@ -4798,7 +4829,10 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { case ISD::XOR: Imm ^= TernlogMagicA; break; } - return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm); + if (PeeledOuterNot) + Imm = ~Imm; + + return matchVPTERNLOG(OriN, ParentA, ParentB, ParentC, A, B, C, Imm); } /// If the high bits of an 'and' operand are known zero, try setting the diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 669d4f0..8d9933b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -582,6 +582,18 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1); return BinaryOperator::CreateSub(ConstCtlz, X); } + + // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false) + if (Op0->hasOneUse() && + match(Op0, + m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) { + Type *Ty = II.getType(); + unsigned BitWidth = Ty->getScalarSizeInBits(); + auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty, + {X, IC.Builder.getFalse()}); + auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth)); + return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz)); + } } // cttz(Pow2) -> Log2(Pow2) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 5aa8de3..f5130da 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -4697,5 +4697,31 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { cast<IntrinsicInst>(TrueVal)->getParamAlign(0).valueOrOne(), CondVal, FalseVal)); + // Canonicalize sign function ashr pattern: select (icmp slt X, 1), ashr X, + // bitwidth-1, 1 -> scmp(X, 0) + // Also handles: select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0) + unsigned BitWidth = SI.getType()->getScalarSizeInBits(); + CmpPredicate Pred; + Value *CmpLHS, *CmpRHS; + + // Canonicalize sign function ashr patterns: + // select (icmp slt X, 1), ashr X, bitwidth-1, 1 -> scmp(X, 0) + // select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0) + if (match(&SI, m_Select(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)), + m_Value(TrueVal), m_Value(FalseVal))) && + ((Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_One()) && + match(TrueVal, + m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1))) && + match(FalseVal, m_One())) || + (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_Zero()) && + match(TrueVal, m_One()) && + match(FalseVal, + m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1)))))) { + + Function *Scmp = Intrinsic::getOrInsertDeclaration( + SI.getModule(), Intrinsic::scmp, {SI.getType(), SI.getType()}); + return CallInst::Create(Scmp, {CmpLHS, ConstantInt::get(SI.getType(), 0)}); + } + return nullptr; } diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b6cbecb..10b03bb 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -226,6 +226,7 @@ static const Align kMinOriginAlignment = Align(4); static const Align kShadowTLSAlignment = Align(8); // These constants must be kept in sync with the ones in msan.h. +// TODO: increase size to match SVE/SVE2/SME/SME2 limits static const unsigned kParamTLSSize = 800; static const unsigned kRetvalTLSSize = 800; @@ -1544,6 +1545,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } } + static bool isAArch64SVCount(Type *Ty) { + if (TargetExtType *TTy = dyn_cast<TargetExtType>(Ty)) + return TTy->getName() == "aarch64.svcount"; + return false; + } + + // This is intended to match the "AArch64 Predicate-as-Counter Type" (aka + // 'target("aarch64.svcount")', but not e.g., <vscale x 4 x i32>. + static bool isScalableNonVectorType(Type *Ty) { + if (!isAArch64SVCount(Ty)) + LLVM_DEBUG(dbgs() << "isScalableNonVectorType: Unexpected type " << *Ty + << "\n"); + + return Ty->isScalableTy() && !isa<VectorType>(Ty); + } + void materializeChecks() { #ifndef NDEBUG // For assert below. @@ -1672,6 +1689,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n"); return Res; } + if (isScalableNonVectorType(OrigTy)) { + LLVM_DEBUG(dbgs() << "getShadowTy: Scalable non-vector type: " << *OrigTy + << "\n"); + return OrigTy; + } + uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy); return IntegerType::get(*MS.C, TypeSize); } @@ -2185,8 +2208,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { << *OrigIns << "\n"); return; } -#ifndef NDEBUG + Type *ShadowTy = Shadow->getType(); + if (isScalableNonVectorType(ShadowTy)) { + LLVM_DEBUG(dbgs() << "Skipping check of scalable non-vector " << *Shadow + << " before " << *OrigIns << "\n"); + return; + } +#ifndef NDEBUG assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) || isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) && "Can only insert checks for integer, vector, and aggregate shadow " @@ -6972,6 +7001,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // an extra "select". This results in much more compact IR. // Sa = select Sb, poisoned, (select b, Sc, Sd) Sa1 = getPoisonedShadow(getShadowTy(I.getType())); + } else if (isScalableNonVectorType(I.getType())) { + // This is intended to handle target("aarch64.svcount"), which can't be + // handled in the else branch because of incompatibility with CreateXor + // ("The supported LLVM operations on this type are limited to load, + // store, phi, select and alloca instructions"). + + // TODO: this currently underapproximates. Use Arm SVE EOR in the else + // branch as needed instead. + Sa1 = getCleanShadow(getShadowTy(I.getType())); } else { // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ] // If Sb (condition is poisoned), look for bits in c and d that are equal diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index a1ad2db..2591df8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -4172,11 +4172,6 @@ class VPlan { /// definitions are VPValues that hold a pointer to their underlying IR. SmallVector<VPValue *, 16> VPLiveIns; - /// Mapping from SCEVs to the VPValues representing their expansions. - /// NOTE: This mapping is temporary and will be removed once all users have - /// been modeled in VPlan directly. - DenseMap<const SCEV *, VPValue *> SCEVToExpansion; - /// Blocks allocated and owned by the VPlan. They will be deleted once the /// VPlan is destroyed. SmallVector<VPBlockBase *> CreatedBlocks; @@ -4424,15 +4419,6 @@ public: LLVM_DUMP_METHOD void dump() const; #endif - VPValue *getSCEVExpansion(const SCEV *S) const { - return SCEVToExpansion.lookup(S); - } - - void addSCEVExpansion(const SCEV *S, VPValue *V) { - assert(!SCEVToExpansion.contains(S) && "SCEV already expanded"); - SCEVToExpansion[S] = V; - } - /// Clone the current VPlan, update all VPValues of the new VPlan and cloned /// recipes to refer to the clones, and return it. VPlan *duplicate(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 06c3d75..fe66f13 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -32,8 +32,6 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) { } VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { - if (auto *Expanded = Plan.getSCEVExpansion(Expr)) - return Expanded; VPValue *Expanded = nullptr; if (auto *E = dyn_cast<SCEVConstant>(Expr)) Expanded = Plan.getOrAddLiveIn(E->getValue()); @@ -50,7 +48,6 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe()); } } - Plan.addSCEVExpansion(Expr, Expanded); return Expanded; } |
