diff options
Diffstat (limited to 'llvm/include/llvm')
38 files changed, 996 insertions, 534 deletions
diff --git a/llvm/include/llvm/ADT/Any.h b/llvm/include/llvm/ADT/Any.h index 88dbce9..a29aaa3 100644 --- a/llvm/include/llvm/ADT/Any.h +++ b/llvm/include/llvm/ADT/Any.h @@ -119,7 +119,6 @@ private: template <class T> friend T any_cast(Any &&Value); template <class T> friend const T *any_cast(const Any *Value); template <class T> friend T *any_cast(Any *Value); - template <typename T> friend bool any_isa(const Any &Value); std::unique_ptr<StorageBase> Storage; }; diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h index 956dcbc..93c6bfb 100644 --- a/llvm/include/llvm/Analysis/DXILResource.h +++ b/llvm/include/llvm/Analysis/DXILResource.h @@ -12,6 +12,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Frontend/HLSL/HLSLBinding.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/PassManager.h" @@ -633,86 +634,25 @@ LLVM_ABI ModulePass *createDXILResourceWrapperPassPass(); // register slots to resources with implicit bindings, and in a // post-optimization validation pass that will raise diagnostic about // overlapping bindings. -// -// For example for these resource bindings: -// -// RWBuffer<float> A[10] : register(u3); -// RWBuffer<float> B[] : register(u5, space2) -// -// The analysis result for UAV binding type will look like this: -// -// UAVSpaces { -// ResClass = ResourceClass::UAV, -// Spaces = { -// { Space = 0, FreeRanges = {{ 0, 2 }, { 13, UINT32_MAX }} }, -// { Space = 2, FreeRanges = {{ 0, 4 }} } -// } -// } -// class DXILResourceBindingInfo { -public: - struct BindingRange { - uint32_t LowerBound; - uint32_t UpperBound; - BindingRange(uint32_t LB, uint32_t UB) : LowerBound(LB), UpperBound(UB) {} - }; - - struct RegisterSpace { - uint32_t Space; - SmallVector<BindingRange> FreeRanges; - RegisterSpace(uint32_t Space) : Space(Space) { - FreeRanges.emplace_back(0, UINT32_MAX); - } - // Size == -1 means unbounded array - LLVM_ABI std::optional<uint32_t> findAvailableBinding(int32_t Size); - }; - - struct BindingSpaces { - dxil::ResourceClass RC; - llvm::SmallVector<RegisterSpace> Spaces; - BindingSpaces(dxil::ResourceClass RC) : RC(RC) {} - LLVM_ABI RegisterSpace &getOrInsertSpace(uint32_t Space); - }; - -private: - BindingSpaces SRVSpaces, UAVSpaces, CBufferSpaces, SamplerSpaces; - bool ImplicitBinding; - bool OverlappingBinding; + hlsl::BindingInfo Bindings; + bool HasImplicitBinding = false; + bool HasOverlappingBinding = false; // Populate the resource binding info given explicit resource binding calls // in the module. void populate(Module &M, DXILResourceTypeMap &DRTM); public: - DXILResourceBindingInfo() - : SRVSpaces(dxil::ResourceClass::SRV), - UAVSpaces(dxil::ResourceClass::UAV), - CBufferSpaces(dxil::ResourceClass::CBuffer), - SamplerSpaces(dxil::ResourceClass::Sampler), ImplicitBinding(false), - OverlappingBinding(false) {} - - bool hasImplicitBinding() const { return ImplicitBinding; } - void setHasImplicitBinding(bool Value) { ImplicitBinding = Value; } - bool hasOverlappingBinding() const { return OverlappingBinding; } - - BindingSpaces &getBindingSpaces(dxil::ResourceClass RC) { - switch (RC) { - case dxil::ResourceClass::SRV: - return SRVSpaces; - case dxil::ResourceClass::UAV: - return UAVSpaces; - case dxil::ResourceClass::CBuffer: - return CBufferSpaces; - case dxil::ResourceClass::Sampler: - return SamplerSpaces; - } + bool hasImplicitBinding() const { return HasImplicitBinding; } + void setHasImplicitBinding(bool Value) { HasImplicitBinding = Value; } + bool hasOverlappingBinding() const { return HasOverlappingBinding; } + void setHasOverlappingBinding(bool Value) { HasOverlappingBinding = Value; } - llvm_unreachable("Invalid resource class"); - } - - // Size == -1 means unbounded array LLVM_ABI std::optional<uint32_t> - findAvailableBinding(dxil::ResourceClass RC, uint32_t Space, int32_t Size); + findAvailableBinding(dxil::ResourceClass RC, uint32_t Space, int32_t Size) { + return Bindings.findAvailableBinding(RC, Space, Size); + } friend class DXILResourceBindingAnalysis; friend class DXILResourceBindingWrapperPass; diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index af6e534..92304ed 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -180,10 +180,12 @@ public: const SmallVectorImpl<Instruction *> &Instrs) const; }; - MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L, + MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC, + DominatorTree *DT, const Loop *L, const DenseMap<Value *, const SCEV *> &SymbolicStrides, unsigned MaxTargetVectorWidthInBits) - : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides), + : PSE(PSE), AC(AC), DT(DT), InnermostLoop(L), + SymbolicStrides(SymbolicStrides), MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {} /// Register the location (instructions are given increasing numbers) @@ -288,6 +290,15 @@ public: return PointerBounds; } + DominatorTree *getDT() const { + assert(DT && "requested DT, but it is not available"); + return DT; + } + AssumptionCache *getAC() const { + assert(AC && "requested AC, but it is not available"); + return AC; + } + private: /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and /// applies dynamic knowledge to simplify SCEV expressions and convert them @@ -296,6 +307,10 @@ private: /// example we might assume a unit stride for a pointer in order to prove /// that a memory access is strided and doesn't wrap. PredicatedScalarEvolution &PSE; + + AssumptionCache *AC; + DominatorTree *DT; + const Loop *InnermostLoop; /// Reference to map of pointer values to @@ -670,7 +685,7 @@ public: LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, - DominatorTree *DT, LoopInfo *LI, + DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, bool AllowPartial = false); /// Return true we can analyze the memory accesses in the loop and there are @@ -922,7 +937,8 @@ LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess( const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap<std::pair<const SCEV *, Type *>, - std::pair<const SCEV *, const SCEV *>> *PointerBounds); + std::pair<const SCEV *, const SCEV *>> *PointerBounds, + DominatorTree *DT, AssumptionCache *AC); class LoopAccessInfoManager { /// The cache. @@ -935,12 +951,13 @@ class LoopAccessInfoManager { LoopInfo &LI; TargetTransformInfo *TTI; const TargetLibraryInfo *TLI = nullptr; + AssumptionCache *AC; public: LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT, LoopInfo &LI, TargetTransformInfo *TTI, - const TargetLibraryInfo *TLI) - : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {} + const TargetLibraryInfo *TLI, AssumptionCache *AC) + : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI), AC(AC) {} LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false); diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h index bff7707..011d599 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h @@ -91,6 +91,10 @@ inline bind_ty<const SCEVUnknown> m_SCEVUnknown(const SCEVUnknown *&V) { return V; } +inline bind_ty<const SCEVAddExpr> m_scev_Add(const SCEVAddExpr *&V) { + return V; +} + /// Match a specified const SCEV *. struct specificscev_ty { const SCEV *Expr; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 7928835..aa4550d 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1950,6 +1950,10 @@ public: const Function &F, SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const; + /// Returns true if GEP should not be used to index into vectors for this + /// target. + LLVM_ABI bool allowVectorElementIndexingUsingGEP() const; + private: std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl; }; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 2ea87b3..abdbca0 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1145,6 +1145,8 @@ public: const Function &F, SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {} + virtual bool allowVectorElementIndexingUsingGEP() const { return true; } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index b55c4e0..6781cd5 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -633,6 +633,9 @@ public: return true; } + /// Return true if this group is full, i.e. it has no gaps. + bool isFull() const { return getNumMembers() == getFactor(); } + private: uint32_t Factor; // Interleave Factor. bool Reverse; diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index ad35d7f..749971e 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -973,7 +973,10 @@ enum : unsigned { // SM based processor values. EF_CUDA_SM100 = 0x6400, + EF_CUDA_SM101 = 0x6500, + EF_CUDA_SM103 = 0x6700, EF_CUDA_SM120 = 0x7800, + EF_CUDA_SM121 = 0x7900, // Set when using an accelerator variant like sm_100a. EF_CUDA_ACCELERATORS = 0x8, diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h index 78f5eb4..5f53681 100644 --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -466,7 +466,7 @@ private: EmitCode(Abbrev); - unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos()); + unsigned i = 0, e = Abbv->getNumOperandInfos(); if (Code) { assert(e && "Expected non-empty abbreviation"); const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i++); @@ -632,8 +632,7 @@ private: void EncodeAbbrev(const BitCodeAbbrev &Abbv) { EmitCode(bitc::DEFINE_ABBREV); EmitVBR(Abbv.getNumOperandInfos(), 5); - for (unsigned i = 0, e = static_cast<unsigned>(Abbv.getNumOperandInfos()); - i != e; ++i) { + for (unsigned i = 0, e = Abbv.getNumOperandInfos(); i != e; ++i) { const BitCodeAbbrevOp &Op = Abbv.getOperandInfo(i); Emit(Op.isLiteral(), 1); if (Op.isLiteral()) { diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index faab2503..91c0142 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -190,6 +190,36 @@ private: /// Emit comments in assembly output if this is true. bool VerboseAsm; + /// Store symbols and type identifiers used to create callgraph section + /// entries related to a function. + struct FunctionInfo { + /// Numeric type identifier used in callgraph section for indirect calls + /// and targets. + using CGTypeId = uint64_t; + + /// Enumeration of function kinds, and their mapping to function kind values + /// stored in callgraph section entries. + /// Must match the enum in llvm/tools/llvm-objdump/llvm-objdump.cpp. + enum class FunctionKind : uint64_t { + /// Function cannot be target to indirect calls. + NOT_INDIRECT_TARGET = 0, + + /// Function may be target to indirect calls but its type id is unknown. + INDIRECT_TARGET_UNKNOWN_TID = 1, + + /// Function may be target to indirect calls and its type id is known. + INDIRECT_TARGET_KNOWN_TID = 2, + }; + + /// Map type identifiers to callsite labels. Labels are generated for each + /// indirect callsite in the function. + SmallVector<std::pair<CGTypeId, MCSymbol *>> CallSiteLabels; + }; + + enum CallGraphSectionFormatVersion : uint64_t { + V_0 = 0, + }; + /// Output stream for the stack usage file (i.e., .su file). std::unique_ptr<raw_fd_ostream> StackUsageStream; @@ -355,6 +385,13 @@ public: /// are available. Returns empty string otherwise. StringRef getConstantSectionSuffix(const Constant *C) const; + /// Generate and emit labels for callees of the indirect callsites which will + /// be used to populate the .callgraph section. + void emitIndirectCalleeLabels( + FunctionInfo &FuncInfo, + const MachineFunction::CallSiteInfoMap &CallSitesInfoMap, + const MachineInstr &MI); + //===------------------------------------------------------------------===// // XRay instrumentation implementation. //===------------------------------------------------------------------===// @@ -442,6 +479,8 @@ public: void emitKCFITrapEntry(const MachineFunction &MF, const MCSymbol *Symbol); virtual void emitKCFITypeId(const MachineFunction &MF); + void emitCallGraphSection(const MachineFunction &MF, FunctionInfo &FuncInfo); + void emitPseudoProbe(const MachineInstr &MI); void emitRemarksSection(remarks::RemarkStreamer &RS); diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 938d71d..9e3d919 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -323,10 +323,11 @@ public: const MachineFunction *getParent() const { return xParent; } MachineFunction *getParent() { return xParent; } - /// Returns true if the original IR terminator is an `indirectbr`. This - /// typically corresponds to a `goto` in C, rather than jump tables. - bool terminatorIsComputedGoto() const { - return back().isIndirectBranch() && + /// Returns true if the original IR terminator is an `indirectbr` with + /// successor blocks. This typically corresponds to a `goto` in C, rather than + /// jump tables. + bool terminatorIsComputedGotoWithSuccessors() const { + return back().isIndirectBranch() && !succ_empty() && llvm::all_of(successors(), [](const MachineBasicBlock *Succ) { return Succ->isIRBlockAddressTaken(); }); diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 7f88323..06c4daf 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -517,6 +517,13 @@ public: SmallVector<ArgRegPair, 1> ArgRegPairs; /// Callee type ids. SmallVector<ConstantInt *, 4> CalleeTypeIds; + + CallSiteInfo() = default; + + /// Extracts the numeric type id from the CallBase's callee_type Metadata, + /// and sets CalleeTypeIds. This is used as type id for the indirect call in + /// the call graph section. + CallSiteInfo(const CallBase &CB); }; struct CalledGlobalInfo { @@ -524,11 +531,12 @@ public: unsigned TargetFlags; }; + using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>; + private: Delegate *TheDelegate = nullptr; GISelChangeObserver *Observer = nullptr; - using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>; /// Map a call instruction to call site arguments forwarding info. CallSiteInfoMap CallSitesInfo; diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h index e63e77a..e705d7d9 100644 --- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -69,6 +69,32 @@ enum { } // end namespace RegState +/// Set of metadata that should be preserved when using BuildMI(). This provides +/// a more convenient way of preserving DebugLoc, PCSections and MMRA. +class MIMetadata { +public: + MIMetadata() = default; + MIMetadata(DebugLoc DL, MDNode *PCSections = nullptr, MDNode *MMRA = nullptr) + : DL(std::move(DL)), PCSections(PCSections), MMRA(MMRA) {} + MIMetadata(const DILocation *DI, MDNode *PCSections = nullptr, + MDNode *MMRA = nullptr) + : DL(DI), PCSections(PCSections), MMRA(MMRA) {} + explicit MIMetadata(const Instruction &From) + : DL(From.getDebugLoc()), + PCSections(From.getMetadata(LLVMContext::MD_pcsections)) {} + explicit MIMetadata(const MachineInstr &From) + : DL(From.getDebugLoc()), PCSections(From.getPCSections()) {} + + const DebugLoc &getDL() const { return DL; } + MDNode *getPCSections() const { return PCSections; } + MDNode *getMMRAMetadata() const { return MMRA; } + +private: + DebugLoc DL; + MDNode *PCSections = nullptr; + MDNode *MMRA = nullptr; +}; + class MachineInstrBuilder { MachineFunction *MF = nullptr; MachineInstr *MI = nullptr; @@ -317,15 +343,11 @@ public: } } - const MachineInstrBuilder &setPCSections(MDNode *MD) const { - if (MD) - MI->setPCSections(*MF, MD); - return *this; - } - - const MachineInstrBuilder &setMMRAMetadata(MDNode *MMRA) const { - if (MMRA) - MI->setMMRAMetadata(*MF, MMRA); + const MachineInstrBuilder ©MIMetadata(const MIMetadata &MIMD) const { + if (MIMD.getPCSections()) + MI->setPCSections(*MF, MIMD.getPCSections()); + if (MIMD.getMMRAMetadata()) + MI->setMMRAMetadata(*MF, MIMD.getMMRAMetadata()); return *this; } @@ -343,38 +365,11 @@ public: } }; -/// Set of metadata that should be preserved when using BuildMI(). This provides -/// a more convenient way of preserving DebugLoc, PCSections and MMRA. -class MIMetadata { -public: - MIMetadata() = default; - MIMetadata(DebugLoc DL, MDNode *PCSections = nullptr, MDNode *MMRA = nullptr) - : DL(std::move(DL)), PCSections(PCSections), MMRA(MMRA) {} - MIMetadata(const DILocation *DI, MDNode *PCSections = nullptr, - MDNode *MMRA = nullptr) - : DL(DI), PCSections(PCSections), MMRA(MMRA) {} - explicit MIMetadata(const Instruction &From) - : DL(From.getDebugLoc()), - PCSections(From.getMetadata(LLVMContext::MD_pcsections)) {} - explicit MIMetadata(const MachineInstr &From) - : DL(From.getDebugLoc()), PCSections(From.getPCSections()) {} - - const DebugLoc &getDL() const { return DL; } - MDNode *getPCSections() const { return PCSections; } - MDNode *getMMRAMetadata() const { return MMRA; } - -private: - DebugLoc DL; - MDNode *PCSections = nullptr; - MDNode *MMRA = nullptr; -}; - /// Builder interface. Specify how to create the initial instruction itself. inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID) { return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, MIMD.getDL())) - .setPCSections(MIMD.getPCSections()) - .setMMRAMetadata(MIMD.getMMRAMetadata()); + .copyMIMetadata(MIMD); } /// This version of the builder sets up the first operand as a @@ -382,8 +377,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID, Register DestReg) { return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, MIMD.getDL())) - .setPCSections(MIMD.getPCSections()) - .setMMRAMetadata(MIMD.getMMRAMetadata()) + .copyMIMetadata(MIMD) .addReg(DestReg, RegState::Define); } @@ -397,10 +391,8 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); BB.insert(I, MI); - return MachineInstrBuilder(MF, MI) - .setPCSections(MIMD.getPCSections()) - .setMMRAMetadata(MIMD.getMMRAMetadata()) - .addReg(DestReg, RegState::Define); + return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD).addReg( + DestReg, RegState::Define); } /// This version of the builder inserts the newly-built instruction before @@ -416,10 +408,8 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); BB.insert(I, MI); - return MachineInstrBuilder(MF, MI) - .setPCSections(MIMD.getPCSections()) - .setMMRAMetadata(MIMD.getMMRAMetadata()) - .addReg(DestReg, RegState::Define); + return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD).addReg( + DestReg, RegState::Define); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I, @@ -449,9 +439,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); BB.insert(I, MI); - return MachineInstrBuilder(MF, MI) - .setPCSections(MIMD.getPCSections()) - .setMMRAMetadata(MIMD.getMMRAMetadata()); + return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, @@ -461,9 +449,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); BB.insert(I, MI); - return MachineInstrBuilder(MF, MI) - .setPCSections(MIMD.getPCSections()) - .setMMRAMetadata(MIMD.getMMRAMetadata()); + return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I, diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index efda7eb..5a2aee2 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -1303,8 +1303,8 @@ protected: SchedBoundary Top; SchedBoundary Bot; - ClusterInfo *TopCluster; - ClusterInfo *BotCluster; + unsigned TopClusterID; + unsigned BotClusterID; /// Candidate last picked from Top boundary. SchedCandidate TopCand; @@ -1346,8 +1346,8 @@ protected: /// Candidate last picked from Bot boundary. SchedCandidate BotCand; - ClusterInfo *TopCluster; - ClusterInfo *BotCluster; + unsigned TopClusterID; + unsigned BotClusterID; public: PostGenericScheduler(const MachineSchedContext *C) diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index 2967532..be90250 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -578,6 +578,18 @@ m_InsertSubvector(const LHS &Base, const RHS &Sub, const IDX &Idx) { return TernaryOpc_match<LHS, RHS, IDX>(ISD::INSERT_SUBVECTOR, Base, Sub, Idx); } +template <typename LTy, typename RTy, typename TTy, typename FTy, typename CCTy> +inline auto m_SelectCC(const LTy &L, const RTy &R, const TTy &T, const FTy &F, + const CCTy &CC) { + return m_Node(ISD::SELECT_CC, L, R, T, F, CC); +} + +template <typename LTy, typename RTy, typename TTy, typename FTy, typename CCTy> +inline auto m_SelectCCLike(const LTy &L, const RTy &R, const TTy &T, + const FTy &F, const CCTy &CC) { + return m_AnyOf(m_Select(m_SetCC(L, R, CC), T, F), m_SelectCC(L, R, T, F, CC)); +} + // === Binary operations === template <typename LHS_P, typename RHS_P, bool Commutable = false, bool ExcludeChain = false> diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h index 3a0a31b..122b7be 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -240,6 +240,11 @@ class TargetRegisterInfo; typedef SmallSet<SUnit *, 8> ClusterInfo; constexpr unsigned InvalidClusterId = ~0u; + /// Return whether the input cluster ID's are the same and valid. + inline bool isTheSameCluster(unsigned A, unsigned B) { + return A != InvalidClusterId && A == B; + } + /// Scheduling unit. This is a node in the scheduling DAG. class SUnit { private: diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 8f88811..11ae8cd 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -182,7 +182,7 @@ public: return SDValue(Node, R); } - /// Return true if this node is an operand of N. + /// Return true if the referenced return value is an operand of N. LLVM_ABI bool isOperandOf(const SDNode *N) const; /// Return the ValueType of the referenced return value. diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index 4551e7e..b06158d 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -92,258 +92,270 @@ def v1i1 : VTVec<1, i1, 17>; // 1 x i1 vector value def v2i1 : VTVec<2, i1, 18>; // 2 x i1 vector value def v3i1 : VTVec<3, i1, 19>; // 3 x i1 vector value def v4i1 : VTVec<4, i1, 20>; // 4 x i1 vector value -def v8i1 : VTVec<8, i1, 21>; // 8 x i1 vector value -def v16i1 : VTVec<16, i1, 22>; // 16 x i1 vector value -def v32i1 : VTVec<32, i1, 23>; // 32 x i1 vector value -def v64i1 : VTVec<64, i1, 24>; // 64 x i1 vector value -def v128i1 : VTVec<128, i1, 25>; // 128 x i1 vector value -def v256i1 : VTVec<256, i1, 26>; // 256 x i1 vector value -def v512i1 : VTVec<512, i1, 27>; // 512 x i1 vector value -def v1024i1 : VTVec<1024, i1, 28>; // 1024 x i1 vector value -def v2048i1 : VTVec<2048, i1, 29>; // 2048 x i1 vector value -def v4096i1 : VTVec<4096, i1, 30>; // 4096 x i1 vector value - -def v128i2 : VTVec<128, i2, 31>; // 128 x i2 vector value -def v256i2 : VTVec<256, i2, 32>; // 256 x i2 vector value - -def v64i4 : VTVec<64, i4, 33>; // 64 x i4 vector value -def v128i4 : VTVec<128, i4, 34>; // 128 x i4 vector value - -def v1i8 : VTVec<1, i8, 35>; // 1 x i8 vector value -def v2i8 : VTVec<2, i8, 36>; // 2 x i8 vector value -def v3i8 : VTVec<3, i8, 37>; // 3 x i8 vector value -def v4i8 : VTVec<4, i8, 38>; // 4 x i8 vector value -def v8i8 : VTVec<8, i8, 39>; // 8 x i8 vector value -def v16i8 : VTVec<16, i8, 40>; // 16 x i8 vector value -def v32i8 : VTVec<32, i8, 41>; // 32 x i8 vector value -def v64i8 : VTVec<64, i8, 42>; // 64 x i8 vector value -def v128i8 : VTVec<128, i8, 43>; // 128 x i8 vector value -def v256i8 : VTVec<256, i8, 44>; // 256 x i8 vector value -def v512i8 : VTVec<512, i8, 45>; // 512 x i8 vector value -def v1024i8 : VTVec<1024, i8, 46>; // 1024 x i8 vector value - -def v1i16 : VTVec<1, i16, 47>; // 1 x i16 vector value -def v2i16 : VTVec<2, i16, 48>; // 2 x i16 vector value -def v3i16 : VTVec<3, i16, 49>; // 3 x i16 vector value -def v4i16 : VTVec<4, i16, 50>; // 4 x i16 vector value -def v8i16 : VTVec<8, i16, 51>; // 8 x i16 vector value -def v16i16 : VTVec<16, i16, 52>; // 16 x i16 vector value -def v32i16 : VTVec<32, i16, 53>; // 32 x i16 vector value -def v64i16 : VTVec<64, i16, 54>; // 64 x i16 vector value -def v128i16 : VTVec<128, i16, 55>; // 128 x i16 vector value -def v256i16 : VTVec<256, i16, 56>; // 256 x i16 vector value -def v512i16 : VTVec<512, i16, 57>; // 512 x i16 vector value -def v4096i16 : VTVec<4096, i16, 58>; // 4096 x i16 vector value - -def v1i32 : VTVec<1, i32, 59>; // 1 x i32 vector value -def v2i32 : VTVec<2, i32, 60>; // 2 x i32 vector value -def v3i32 : VTVec<3, i32, 61>; // 3 x i32 vector value -def v4i32 : VTVec<4, i32, 62>; // 4 x i32 vector value -def v5i32 : VTVec<5, i32, 63>; // 5 x i32 vector value -def v6i32 : VTVec<6, i32, 64>; // 6 x f32 vector value -def v7i32 : VTVec<7, i32, 65>; // 7 x f32 vector value -def v8i32 : VTVec<8, i32, 66>; // 8 x i32 vector value -def v9i32 : VTVec<9, i32, 67>; // 9 x i32 vector value -def v10i32 : VTVec<10, i32, 68>; // 10 x i32 vector value -def v11i32 : VTVec<11, i32, 69>; // 11 x i32 vector value -def v12i32 : VTVec<12, i32, 70>; // 12 x i32 vector value -def v16i32 : VTVec<16, i32, 71>; // 16 x i32 vector value -def v32i32 : VTVec<32, i32, 72>; // 32 x i32 vector value -def v64i32 : VTVec<64, i32, 73>; // 64 x i32 vector value -def v128i32 : VTVec<128, i32, 74>; // 128 x i32 vector value -def v256i32 : VTVec<256, i32, 75>; // 256 x i32 vector value -def v512i32 : VTVec<512, i32, 76>; // 512 x i32 vector value -def v1024i32 : VTVec<1024, i32, 77>; // 1024 x i32 vector value -def v2048i32 : VTVec<2048, i32, 78>; // 2048 x i32 vector value -def v4096i32 : VTVec<4096, i32, 79>; // 4096 x i32 vector value - -def v1i64 : VTVec<1, i64, 80>; // 1 x i64 vector value -def v2i64 : VTVec<2, i64, 81>; // 2 x i64 vector value -def v3i64 : VTVec<3, i64, 82>; // 3 x i64 vector value -def v4i64 : VTVec<4, i64, 83>; // 4 x i64 vector value -def v8i64 : VTVec<8, i64, 84>; // 8 x i64 vector value -def v16i64 : VTVec<16, i64, 85>; // 16 x i64 vector value -def v32i64 : VTVec<32, i64, 86>; // 32 x i64 vector value -def v64i64 : VTVec<64, i64, 87>; // 64 x i64 vector value -def v128i64 : VTVec<128, i64, 88>; // 128 x i64 vector value -def v256i64 : VTVec<256, i64, 89>; // 256 x i64 vector value - -def v1i128 : VTVec<1, i128, 90>; // 1 x i128 vector value - -def v1f16 : VTVec<1, f16, 91>; // 1 x f16 vector value -def v2f16 : VTVec<2, f16, 92>; // 2 x f16 vector value -def v3f16 : VTVec<3, f16, 93>; // 3 x f16 vector value -def v4f16 : VTVec<4, f16, 94>; // 4 x f16 vector value -def v8f16 : VTVec<8, f16, 95>; // 8 x f16 vector value -def v16f16 : VTVec<16, f16, 96>; // 16 x f16 vector value -def v32f16 : VTVec<32, f16, 97>; // 32 x f16 vector value -def v64f16 : VTVec<64, f16, 98>; // 64 x f16 vector value -def v128f16 : VTVec<128, f16, 99>; // 128 x f16 vector value -def v256f16 : VTVec<256, f16, 100>; // 256 x f16 vector value -def v512f16 : VTVec<512, f16, 101>; // 512 x f16 vector value -def v4096f16 : VTVec<4096, f16, 102>; // 4096 x f16 vector value - -def v1bf16 : VTVec<1, bf16, 103>; // 1 x bf16 vector value -def v2bf16 : VTVec<2, bf16, 104>; // 2 x bf16 vector value -def v3bf16 : VTVec<3, bf16, 105>; // 3 x bf16 vector value -def v4bf16 : VTVec<4, bf16, 106>; // 4 x bf16 vector value -def v8bf16 : VTVec<8, bf16, 107>; // 8 x bf16 vector value -def v16bf16 : VTVec<16, bf16, 108>; // 16 x bf16 vector value -def v32bf16 : VTVec<32, bf16, 109>; // 32 x bf16 vector value -def v64bf16 : VTVec<64, bf16, 110>; // 64 x bf16 vector value -def v128bf16 : VTVec<128, bf16, 111>; // 128 x bf16 vector value -def v4096bf16 : VTVec<4096, bf16, 112>; // 4096 x bf16 vector value - -def v1f32 : VTVec<1, f32, 113>; // 1 x f32 vector value -def v2f32 : VTVec<2, f32, 114>; // 2 x f32 vector value -def v3f32 : VTVec<3, f32, 115>; // 3 x f32 vector value -def v4f32 : VTVec<4, f32, 116>; // 4 x f32 vector value -def v5f32 : VTVec<5, f32, 117>; // 5 x f32 vector value -def v6f32 : VTVec<6, f32, 118>; // 6 x f32 vector value -def v7f32 : VTVec<7, f32, 119>; // 7 x f32 vector value -def v8f32 : VTVec<8, f32, 120>; // 8 x f32 vector value -def v9f32 : VTVec<9, f32, 121>; // 9 x f32 vector value -def v10f32 : VTVec<10, f32, 122>; // 10 x f32 vector value -def v11f32 : VTVec<11, f32, 123>; // 11 x f32 vector value -def v12f32 : VTVec<12, f32, 124>; // 12 x f32 vector value -def v16f32 : VTVec<16, f32, 125>; // 16 x f32 vector value -def v32f32 : VTVec<32, f32, 126>; // 32 x f32 vector value -def v64f32 : VTVec<64, f32, 127>; // 64 x f32 vector value -def v128f32 : VTVec<128, f32, 128>; // 128 x f32 vector value -def v256f32 : VTVec<256, f32, 129>; // 256 x f32 vector value -def v512f32 : VTVec<512, f32, 130>; // 512 x f32 vector value -def v1024f32 : VTVec<1024, f32, 131>; // 1024 x f32 vector value -def v2048f32 : VTVec<2048, f32, 132>; // 2048 x f32 vector value - -def v1f64 : VTVec<1, f64, 133>; // 1 x f64 vector value -def v2f64 : VTVec<2, f64, 134>; // 2 x f64 vector value -def v3f64 : VTVec<3, f64, 135>; // 3 x f64 vector value -def v4f64 : VTVec<4, f64, 136>; // 4 x f64 vector value -def v8f64 : VTVec<8, f64, 137>; // 8 x f64 vector value -def v16f64 : VTVec<16, f64, 138>; // 16 x f64 vector value -def v32f64 : VTVec<32, f64, 139>; // 32 x f64 vector value -def v64f64 : VTVec<64, f64, 140>; // 64 x f64 vector value -def v128f64 : VTVec<128, f64, 141>; // 128 x f64 vector value -def v256f64 : VTVec<256, f64, 142>; // 256 x f64 vector value - -def nxv1i1 : VTScalableVec<1, i1, 143>; // n x 1 x i1 vector value -def nxv2i1 : VTScalableVec<2, i1, 144>; // n x 2 x i1 vector value -def nxv4i1 : VTScalableVec<4, i1, 145>; // n x 4 x i1 vector value -def nxv8i1 : VTScalableVec<8, i1, 146>; // n x 8 x i1 vector value -def nxv16i1 : VTScalableVec<16, i1, 147>; // n x 16 x i1 vector value -def nxv32i1 : VTScalableVec<32, i1, 148>; // n x 32 x i1 vector value -def nxv64i1 : VTScalableVec<64, i1, 149>; // n x 64 x i1 vector value - -def nxv1i8 : VTScalableVec<1, i8, 150>; // n x 1 x i8 vector value -def nxv2i8 : VTScalableVec<2, i8, 151>; // n x 2 x i8 vector value -def nxv4i8 : VTScalableVec<4, i8, 152>; // n x 4 x i8 vector value -def nxv8i8 : VTScalableVec<8, i8, 153>; // n x 8 x i8 vector value -def nxv16i8 : VTScalableVec<16, i8, 154>; // n x 16 x i8 vector value -def nxv32i8 : VTScalableVec<32, i8, 155>; // n x 32 x i8 vector value -def nxv64i8 : VTScalableVec<64, i8, 156>; // n x 64 x i8 vector value - -def nxv1i16 : VTScalableVec<1, i16, 157>; // n x 1 x i16 vector value -def nxv2i16 : VTScalableVec<2, i16, 158>; // n x 2 x i16 vector value -def nxv4i16 : VTScalableVec<4, i16, 159>; // n x 4 x i16 vector value -def nxv8i16 : VTScalableVec<8, i16, 160>; // n x 8 x i16 vector value -def nxv16i16 : VTScalableVec<16, i16, 161>; // n x 16 x i16 vector value -def nxv32i16 : VTScalableVec<32, i16, 162>; // n x 32 x i16 vector value - -def nxv1i32 : VTScalableVec<1, i32, 163>; // n x 1 x i32 vector value -def nxv2i32 : VTScalableVec<2, i32, 164>; // n x 2 x i32 vector value -def nxv4i32 : VTScalableVec<4, i32, 165>; // n x 4 x i32 vector value -def nxv8i32 : VTScalableVec<8, i32, 166>; // n x 8 x i32 vector value -def nxv16i32 : VTScalableVec<16, i32, 167>; // n x 16 x i32 vector value -def nxv32i32 : VTScalableVec<32, i32, 168>; // n x 32 x i32 vector value - -def nxv1i64 : VTScalableVec<1, i64, 169>; // n x 1 x i64 vector value -def nxv2i64 : VTScalableVec<2, i64, 170>; // n x 2 x i64 vector value -def nxv4i64 : VTScalableVec<4, i64, 171>; // n x 4 x i64 vector value -def nxv8i64 : VTScalableVec<8, i64, 172>; // n x 8 x i64 vector value -def nxv16i64 : VTScalableVec<16, i64, 173>; // n x 16 x i64 vector value -def nxv32i64 : VTScalableVec<32, i64, 174>; // n x 32 x i64 vector value - -def nxv1f16 : VTScalableVec<1, f16, 175>; // n x 1 x f16 vector value -def nxv2f16 : VTScalableVec<2, f16, 176>; // n x 2 x f16 vector value -def nxv4f16 : VTScalableVec<4, f16, 177>; // n x 4 x f16 vector value -def nxv8f16 : VTScalableVec<8, f16, 178>; // n x 8 x f16 vector value -def nxv16f16 : VTScalableVec<16, f16, 179>; // n x 16 x f16 vector value -def nxv32f16 : VTScalableVec<32, f16, 180>; // n x 32 x f16 vector value - -def nxv1bf16 : VTScalableVec<1, bf16, 181>; // n x 1 x bf16 vector value -def nxv2bf16 : VTScalableVec<2, bf16, 182>; // n x 2 x bf16 vector value -def nxv4bf16 : VTScalableVec<4, bf16, 183>; // n x 4 x bf16 vector value -def nxv8bf16 : VTScalableVec<8, bf16, 184>; // n x 8 x bf16 vector value -def nxv16bf16 : VTScalableVec<16, bf16, 185>; // n x 16 x bf16 vector value -def nxv32bf16 : VTScalableVec<32, bf16, 186>; // n x 32 x bf16 vector value - -def nxv1f32 : VTScalableVec<1, f32, 187>; // n x 1 x f32 vector value -def nxv2f32 : VTScalableVec<2, f32, 188>; // n x 2 x f32 vector value -def nxv4f32 : VTScalableVec<4, f32, 189>; // n x 4 x f32 vector value -def nxv8f32 : VTScalableVec<8, f32, 190>; // n x 8 x f32 vector value -def nxv16f32 : VTScalableVec<16, f32, 191>; // n x 16 x f32 vector value - -def nxv1f64 : VTScalableVec<1, f64, 192>; // n x 1 x f64 vector value -def nxv2f64 : VTScalableVec<2, f64, 193>; // n x 2 x f64 vector value -def nxv4f64 : VTScalableVec<4, f64, 194>; // n x 4 x f64 vector value -def nxv8f64 : VTScalableVec<8, f64, 195>; // n x 8 x f64 vector value +def v5i1 : VTVec<5, i1, 21>; // 5 x i1 vector value +def v6i1 : VTVec<6, i1, 22>; // 6 x i1 vector value +def v7i1 : VTVec<7, i1, 23>; // 7 x i1 vector value +def v8i1 : VTVec<8, i1, 24>; // 8 x i1 vector value +def v16i1 : VTVec<16, i1, 25>; // 16 x i1 vector value +def v32i1 : VTVec<32, i1, 26>; // 32 x i1 vector value +def v64i1 : VTVec<64, i1, 27>; // 64 x i1 vector value +def v128i1 : VTVec<128, i1, 28>; // 128 x i1 vector value +def v256i1 : VTVec<256, i1, 29>; // 256 x i1 vector value +def v512i1 : VTVec<512, i1, 30>; // 512 x i1 vector value +def v1024i1 : VTVec<1024, i1, 31>; // 1024 x i1 vector value +def v2048i1 : VTVec<2048, i1, 32>; // 2048 x i1 vector value +def v4096i1 : VTVec<4096, i1, 33>; // 4096 x i1 vector value + +def v128i2 : VTVec<128, i2, 34>; // 128 x i2 vector value +def v256i2 : VTVec<256, i2, 35>; // 256 x i2 vector value + +def v64i4 : VTVec<64, i4, 36>; // 64 x i4 vector value +def v128i4 : VTVec<128, i4, 37>; // 128 x i4 vector value + +def v1i8 : VTVec<1, i8, 38>; // 1 x i8 vector value +def v2i8 : VTVec<2, i8, 39>; // 2 x i8 vector value +def v3i8 : VTVec<3, i8, 40>; // 3 x i8 vector value +def v4i8 : VTVec<4, i8, 41>; // 4 x i8 vector value +def v5i8 : VTVec<5, i8, 42>; // 5 x i8 vector value +def v6i8 : VTVec<6, i8, 43>; // 6 x i8 vector value +def v7i8 : VTVec<7, i8, 44>; // 7 x i8 vector value +def v8i8 : VTVec<8, i8, 45>; // 8 x i8 vector value +def v16i8 : VTVec<16, i8, 46>; // 16 x i8 vector value +def v32i8 : VTVec<32, i8, 47>; // 32 x i8 vector value +def v64i8 : VTVec<64, i8, 48>; // 64 x i8 vector value +def v128i8 : VTVec<128, i8, 49>; // 128 x i8 vector value +def v256i8 : VTVec<256, i8, 50>; // 256 x i8 vector value +def v512i8 : VTVec<512, i8, 51>; // 512 x i8 vector value +def v1024i8 : VTVec<1024, i8, 52>; // 1024 x i8 vector value + +def v1i16 : VTVec<1, i16, 53>; // 1 x i16 vector value +def v2i16 : VTVec<2, i16, 54>; // 2 x i16 vector value +def v3i16 : VTVec<3, i16, 55>; // 3 x i16 vector value +def v4i16 : VTVec<4, i16, 56>; // 4 x i16 vector value +def v5i16 : VTVec<5, i16, 57>; // 5 x i16 vector value +def v6i16 : VTVec<6, i16, 58>; // 6 x i16 vector value +def v7i16 : VTVec<7, i16, 59>; // 7 x i16 vector value +def v8i16 : VTVec<8, i16, 60>; // 8 x i16 vector value +def v16i16 : VTVec<16, i16, 61>; // 16 x i16 vector value +def v32i16 : VTVec<32, i16, 62>; // 32 x i16 vector value +def v64i16 : VTVec<64, i16, 63>; // 64 x i16 vector value +def v128i16 : VTVec<128, i16, 64>; // 128 x i16 vector value +def v256i16 : VTVec<256, i16, 65>; // 256 x i16 vector value +def v512i16 : VTVec<512, i16, 66>; // 512 x i16 vector value +def v4096i16 : VTVec<4096, i16, 67>; // 4096 x i16 vector value + +def v1i32 : VTVec<1, i32, 68>; // 1 x i32 vector value +def v2i32 : VTVec<2, i32, 69>; // 2 x i32 vector value +def v3i32 : VTVec<3, i32, 70>; // 3 x i32 vector value +def v4i32 : VTVec<4, i32, 71>; // 4 x i32 vector value +def v5i32 : VTVec<5, i32, 72>; // 5 x i32 vector value +def v6i32 : VTVec<6, i32, 73>; // 6 x i32 vector value +def v7i32 : VTVec<7, i32, 74>; // 7 x i32 vector value +def v8i32 : VTVec<8, i32, 75>; // 8 x i32 vector value +def v9i32 : VTVec<9, i32, 76>; // 9 x i32 vector value +def v10i32 : VTVec<10, i32, 77>; // 10 x i32 vector value +def v11i32 : VTVec<11, i32, 78>; // 11 x i32 vector value +def v12i32 : VTVec<12, i32, 79>; // 12 x i32 vector value +def v16i32 : VTVec<16, i32, 80>; // 16 x i32 vector value +def v32i32 : VTVec<32, i32, 81>; // 32 x i32 vector value +def v64i32 : VTVec<64, i32, 82>; // 64 x i32 vector value +def v128i32 : VTVec<128, i32, 83>; // 128 x i32 vector value +def v256i32 : VTVec<256, i32, 84>; // 256 x i32 vector value +def v512i32 : VTVec<512, i32, 85>; // 512 x i32 vector value +def v1024i32 : VTVec<1024, i32, 86>; // 1024 x i32 vector value +def v2048i32 : VTVec<2048, i32, 87>; // 2048 x i32 vector value +def v4096i32 : VTVec<4096, i32, 88>; // 4096 x i32 vector value + +def v1i64 : VTVec<1, i64, 89>; // 1 x i64 vector value +def v2i64 : VTVec<2, i64, 90>; // 2 x i64 vector value +def v3i64 : VTVec<3, i64, 91>; // 3 x i64 vector value +def v4i64 : VTVec<4, i64, 92>; // 4 x i64 vector value +def v8i64 : VTVec<8, i64, 93>; // 8 x i64 vector value +def v16i64 : VTVec<16, i64, 94>; // 16 x i64 vector value +def v32i64 : VTVec<32, i64, 95>; // 32 x i64 vector value +def v64i64 : VTVec<64, i64, 96>; // 64 x i64 vector value +def v128i64 : VTVec<128, i64, 97>; // 128 x i64 vector value +def v256i64 : VTVec<256, i64, 98>; // 256 x i64 vector value + +def v1i128 : VTVec<1, i128, 99>; // 1 x i128 vector value + +def v1f16 : VTVec<1, f16, 100>; // 1 x f16 vector value +def v2f16 : VTVec<2, f16, 101>; // 2 x f16 vector value +def v3f16 : VTVec<3, f16, 102>; // 3 x f16 vector value +def v4f16 : VTVec<4, f16, 103>; // 4 x f16 vector value +def v5f16 : VTVec<5, f16, 104>; // 5 x f16 vector value +def v6f16 : VTVec<6, f16, 105>; // 6 x f16 vector value +def v7f16 : VTVec<7, f16, 106>; // 7 x f16 vector value +def v8f16 : VTVec<8, f16, 107>; // 8 x f16 vector value +def v16f16 : VTVec<16, f16, 108>; // 16 x f16 vector value +def v32f16 : VTVec<32, f16, 109>; // 32 x f16 vector value +def v64f16 : VTVec<64, f16, 110>; // 64 x f16 vector value +def v128f16 : VTVec<128, f16, 111>; // 128 x f16 vector value +def v256f16 : VTVec<256, f16, 112>; // 256 x f16 vector value +def v512f16 : VTVec<512, f16, 113>; // 512 x f16 vector value +def v4096f16 : VTVec<4096, f16, 114>; // 4096 x f16 vector value + +def v1bf16 : VTVec<1, bf16, 115>; // 1 x bf16 vector value +def v2bf16 : VTVec<2, bf16, 116>; // 2 x bf16 vector value +def v3bf16 : VTVec<3, bf16, 117>; // 3 x bf16 vector value +def v4bf16 : VTVec<4, bf16, 118>; // 4 x bf16 vector value +def v8bf16 : VTVec<8, bf16, 119>; // 8 x bf16 vector value +def v16bf16 : VTVec<16, bf16, 120>; // 16 x bf16 vector value +def v32bf16 : VTVec<32, bf16, 121>; // 32 x bf16 vector value +def v64bf16 : VTVec<64, bf16, 122>; // 64 x bf16 vector value +def v128bf16 : VTVec<128, bf16, 123>; // 128 x bf16 vector value +def v4096bf16 : VTVec<4096, bf16, 124>; // 4096 x bf16 vector value + +def v1f32 : VTVec<1, f32, 125>; // 1 x f32 vector value +def v2f32 : VTVec<2, f32, 126>; // 2 x f32 vector value +def v3f32 : VTVec<3, f32, 127>; // 3 x f32 vector value +def v4f32 : VTVec<4, f32, 128>; // 4 x f32 vector value +def v5f32 : VTVec<5, f32, 129>; // 5 x f32 vector value +def v6f32 : VTVec<6, f32, 130>; // 6 x f32 vector value +def v7f32 : VTVec<7, f32, 131>; // 7 x f32 vector value +def v8f32 : VTVec<8, f32, 132>; // 8 x f32 vector value +def v9f32 : VTVec<9, f32, 133>; // 9 x f32 vector value +def v10f32 : VTVec<10, f32, 134>; // 10 x f32 vector value +def v11f32 : VTVec<11, f32, 135>; // 11 x f32 vector value +def v12f32 : VTVec<12, f32, 136>; // 12 x f32 vector value +def v16f32 : VTVec<16, f32, 137>; // 16 x f32 vector value +def v32f32 : VTVec<32, f32, 138>; // 32 x f32 vector value +def v64f32 : VTVec<64, f32, 139>; // 64 x f32 vector value +def v128f32 : VTVec<128, f32, 140>; // 128 x f32 vector value +def v256f32 : VTVec<256, f32, 141>; // 256 x f32 vector value +def v512f32 : VTVec<512, f32, 142>; // 512 x f32 vector value +def v1024f32 : VTVec<1024, f32, 143>; // 1024 x f32 vector value +def v2048f32 : VTVec<2048, f32, 144>; // 2048 x f32 vector value + +def v1f64 : VTVec<1, f64, 145>; // 1 x f64 vector value +def v2f64 : VTVec<2, f64, 146>; // 2 x f64 vector value +def v3f64 : VTVec<3, f64, 147>; // 3 x f64 vector value +def v4f64 : VTVec<4, f64, 148>; // 4 x f64 vector value +def v8f64 : VTVec<8, f64, 149>; // 8 x f64 vector value +def v16f64 : VTVec<16, f64, 150>; // 16 x f64 vector value +def v32f64 : VTVec<32, f64, 151>; // 32 x f64 vector value +def v64f64 : VTVec<64, f64, 152>; // 64 x f64 vector value +def v128f64 : VTVec<128, f64, 153>; // 128 x f64 vector value +def v256f64 : VTVec<256, f64, 154>; // 256 x f64 vector value + +def nxv1i1 : VTScalableVec<1, i1, 155>; // n x 1 x i1 vector value +def nxv2i1 : VTScalableVec<2, i1, 156>; // n x 2 x i1 vector value +def nxv4i1 : VTScalableVec<4, i1, 157>; // n x 4 x i1 vector value +def nxv8i1 : VTScalableVec<8, i1, 158>; // n x 8 x i1 vector value +def nxv16i1 : VTScalableVec<16, i1, 159>; // n x 16 x i1 vector value +def nxv32i1 : VTScalableVec<32, i1, 160>; // n x 32 x i1 vector value +def nxv64i1 : VTScalableVec<64, i1, 161>; // n x 64 x i1 vector value + +def nxv1i8 : VTScalableVec<1, i8, 162>; // n x 1 x i8 vector value +def nxv2i8 : VTScalableVec<2, i8, 163>; // n x 2 x i8 vector value +def nxv4i8 : VTScalableVec<4, i8, 164>; // n x 4 x i8 vector value +def nxv8i8 : VTScalableVec<8, i8, 165>; // n x 8 x i8 vector value +def nxv16i8 : VTScalableVec<16, i8, 166>; // n x 16 x i8 vector value +def nxv32i8 : VTScalableVec<32, i8, 167>; // n x 32 x i8 vector value +def nxv64i8 : VTScalableVec<64, i8, 168>; // n x 64 x i8 vector value + +def nxv1i16 : VTScalableVec<1, i16, 169>; // n x 1 x i16 vector value +def nxv2i16 : VTScalableVec<2, i16, 170>; // n x 2 x i16 vector value +def nxv4i16 : VTScalableVec<4, i16, 171>; // n x 4 x i16 vector value +def nxv8i16 : VTScalableVec<8, i16, 172>; // n x 8 x i16 vector value +def nxv16i16 : VTScalableVec<16, i16, 173>; // n x 16 x i16 vector value +def nxv32i16 : VTScalableVec<32, i16, 174>; // n x 32 x i16 vector value + +def nxv1i32 : VTScalableVec<1, i32, 175>; // n x 1 x i32 vector value +def nxv2i32 : VTScalableVec<2, i32, 176>; // n x 2 x i32 vector value +def nxv4i32 : VTScalableVec<4, i32, 177>; // n x 4 x i32 vector value +def nxv8i32 : VTScalableVec<8, i32, 178>; // n x 8 x i32 vector value +def nxv16i32 : VTScalableVec<16, i32, 179>; // n x 16 x i32 vector value +def nxv32i32 : VTScalableVec<32, i32, 180>; // n x 32 x i32 vector value + +def nxv1i64 : VTScalableVec<1, i64, 181>; // n x 1 x i64 vector value +def nxv2i64 : VTScalableVec<2, i64, 182>; // n x 2 x i64 vector value +def nxv4i64 : VTScalableVec<4, i64, 183>; // n x 4 x i64 vector value +def nxv8i64 : VTScalableVec<8, i64, 184>; // n x 8 x i64 vector value +def nxv16i64 : VTScalableVec<16, i64, 185>; // n x 16 x i64 vector value +def nxv32i64 : VTScalableVec<32, i64, 186>; // n x 32 x i64 vector value + +def nxv1f16 : VTScalableVec<1, f16, 187>; // n x 1 x f16 vector value +def nxv2f16 : VTScalableVec<2, f16, 188>; // n x 2 x f16 vector value +def nxv4f16 : VTScalableVec<4, f16, 189>; // n x 4 x f16 vector value +def nxv8f16 : VTScalableVec<8, f16, 190>; // n x 8 x f16 vector value +def nxv16f16 : VTScalableVec<16, f16, 191>; // n x 16 x f16 vector value +def nxv32f16 : VTScalableVec<32, f16, 192>; // n x 32 x f16 vector value + +def nxv1bf16 : VTScalableVec<1, bf16, 193>; // n x 1 x bf16 vector value +def nxv2bf16 : VTScalableVec<2, bf16, 194>; // n x 2 x bf16 vector value +def nxv4bf16 : VTScalableVec<4, bf16, 195>; // n x 4 x bf16 vector value +def nxv8bf16 : VTScalableVec<8, bf16, 196>; // n x 8 x bf16 vector value +def nxv16bf16 : VTScalableVec<16, bf16, 197>; // n x 16 x bf16 vector value +def nxv32bf16 : VTScalableVec<32, bf16, 198>; // n x 32 x bf16 vector value + +def nxv1f32 : VTScalableVec<1, f32, 199>; // n x 1 x f32 vector value +def nxv2f32 : VTScalableVec<2, f32, 200>; // n x 2 x f32 vector value +def nxv4f32 : VTScalableVec<4, f32, 201>; // n x 4 x f32 vector value +def nxv8f32 : VTScalableVec<8, f32, 202>; // n x 8 x f32 vector value +def nxv16f32 : VTScalableVec<16, f32, 203>; // n x 16 x f32 vector value + +def nxv1f64 : VTScalableVec<1, f64, 204>; // n x 1 x f64 vector value +def nxv2f64 : VTScalableVec<2, f64, 205>; // n x 2 x f64 vector value +def nxv4f64 : VTScalableVec<4, f64, 206>; // n x 4 x f64 vector value +def nxv8f64 : VTScalableVec<8, f64, 207>; // n x 8 x f64 vector value // Sz = NF * MinNumElts * 8(bits) -def riscv_nxv1i8x2 : VTVecTup<16, 2, i8, 196>; // RISCV vector tuple(min_num_elts=1, nf=2) -def riscv_nxv1i8x3 : VTVecTup<24, 3, i8, 197>; // RISCV vector tuple(min_num_elts=1, nf=3) -def riscv_nxv1i8x4 : VTVecTup<32, 4, i8, 198>; // RISCV vector tuple(min_num_elts=1, nf=4) -def riscv_nxv1i8x5 : VTVecTup<40, 5, i8, 199>; // RISCV vector tuple(min_num_elts=1, nf=5) -def riscv_nxv1i8x6 : VTVecTup<48, 6, i8, 200>; // RISCV vector tuple(min_num_elts=1, nf=6) -def riscv_nxv1i8x7 : VTVecTup<56, 7, i8, 201>; // RISCV vector tuple(min_num_elts=1, nf=7) -def riscv_nxv1i8x8 : VTVecTup<64, 8, i8, 202>; // RISCV vector tuple(min_num_elts=1, nf=8) -def riscv_nxv2i8x2 : VTVecTup<32, 2, i8, 203>; // RISCV vector tuple(min_num_elts=2, nf=2) -def riscv_nxv2i8x3 : VTVecTup<48, 3, i8, 204>; // RISCV vector tuple(min_num_elts=2, nf=3) -def riscv_nxv2i8x4 : VTVecTup<64, 4, i8, 205>; // RISCV vector tuple(min_num_elts=2, nf=4) -def riscv_nxv2i8x5 : VTVecTup<80, 5, i8, 206>; // RISCV vector tuple(min_num_elts=2, nf=5) -def riscv_nxv2i8x6 : VTVecTup<96, 6, i8, 207>; // RISCV vector tuple(min_num_elts=2, nf=6) -def riscv_nxv2i8x7 : VTVecTup<112, 7, i8, 208>; // RISCV vector tuple(min_num_elts=2, nf=7) -def riscv_nxv2i8x8 : VTVecTup<128, 8, i8, 209>; // RISCV vector tuple(min_num_elts=2, nf=8) -def riscv_nxv4i8x2 : VTVecTup<64, 2, i8, 210>; // RISCV vector tuple(min_num_elts=4, nf=2) -def riscv_nxv4i8x3 : VTVecTup<96, 3, i8, 211>; // RISCV vector tuple(min_num_elts=4, nf=3) -def riscv_nxv4i8x4 : VTVecTup<128, 4, i8, 212>; // RISCV vector tuple(min_num_elts=4, nf=4) -def riscv_nxv4i8x5 : VTVecTup<160, 5, i8, 213>; // RISCV vector tuple(min_num_elts=4, nf=5) -def riscv_nxv4i8x6 : VTVecTup<192, 6, i8, 214>; // RISCV vector tuple(min_num_elts=4, nf=6) -def riscv_nxv4i8x7 : VTVecTup<224, 7, i8, 215>; // RISCV vector tuple(min_num_elts=4, nf=7) -def riscv_nxv4i8x8 : VTVecTup<256, 8, i8, 216>; // RISCV vector tuple(min_num_elts=4, nf=8) -def riscv_nxv8i8x2 : VTVecTup<128, 2, i8, 217>; // RISCV vector tuple(min_num_elts=8, nf=2) -def riscv_nxv8i8x3 : VTVecTup<192, 3, i8, 218>; // RISCV vector tuple(min_num_elts=8, nf=3) -def riscv_nxv8i8x4 : VTVecTup<256, 4, i8, 219>; // RISCV vector tuple(min_num_elts=8, nf=4) -def riscv_nxv8i8x5 : VTVecTup<320, 5, i8, 220>; // RISCV vector tuple(min_num_elts=8, nf=5) -def riscv_nxv8i8x6 : VTVecTup<384, 6, i8, 221>; // RISCV vector tuple(min_num_elts=8, nf=6) -def riscv_nxv8i8x7 : VTVecTup<448, 7, i8, 222>; // RISCV vector tuple(min_num_elts=8, nf=7) -def riscv_nxv8i8x8 : VTVecTup<512, 8, i8, 223>; // RISCV vector tuple(min_num_elts=8, nf=8) -def riscv_nxv16i8x2 : VTVecTup<256, 2, i8, 224>; // RISCV vector tuple(min_num_elts=16, nf=2) -def riscv_nxv16i8x3 : VTVecTup<384, 3, i8, 225>; // RISCV vector tuple(min_num_elts=16, nf=3) -def riscv_nxv16i8x4 : VTVecTup<512, 4, i8, 226>; // RISCV vector tuple(min_num_elts=16, nf=4) -def riscv_nxv32i8x2 : VTVecTup<512, 2, i8, 227>; // RISCV vector tuple(min_num_elts=32, nf=2) - -def x86mmx : ValueType<64, 228>; // X86 MMX value -def Glue : ValueType<0, 229>; // Pre-RA sched glue -def isVoid : ValueType<0, 230>; // Produces no value -def untyped : ValueType<8, 231> { // Produces an untyped value +def riscv_nxv1i8x2 : VTVecTup<16, 2, i8, 208>; // RISCV vector tuple(min_num_elts=1, nf=2) +def riscv_nxv1i8x3 : VTVecTup<24, 3, i8, 209>; // RISCV vector tuple(min_num_elts=1, nf=3) +def riscv_nxv1i8x4 : VTVecTup<32, 4, i8, 210>; // RISCV vector tuple(min_num_elts=1, nf=4) +def riscv_nxv1i8x5 : VTVecTup<40, 5, i8, 211>; // RISCV vector tuple(min_num_elts=1, nf=5) +def riscv_nxv1i8x6 : VTVecTup<48, 6, i8, 212>; // RISCV vector tuple(min_num_elts=1, nf=6) +def riscv_nxv1i8x7 : VTVecTup<56, 7, i8, 213>; // RISCV vector tuple(min_num_elts=1, nf=7) +def riscv_nxv1i8x8 : VTVecTup<64, 8, i8, 214>; // RISCV vector tuple(min_num_elts=1, nf=8) +def riscv_nxv2i8x2 : VTVecTup<32, 2, i8, 215>; // RISCV vector tuple(min_num_elts=2, nf=2) +def riscv_nxv2i8x3 : VTVecTup<48, 3, i8, 216>; // RISCV vector tuple(min_num_elts=2, nf=3) +def riscv_nxv2i8x4 : VTVecTup<64, 4, i8, 217>; // RISCV vector tuple(min_num_elts=2, nf=4) +def riscv_nxv2i8x5 : VTVecTup<80, 5, i8, 218>; // RISCV vector tuple(min_num_elts=2, nf=5) +def riscv_nxv2i8x6 : VTVecTup<96, 6, i8, 219>; // RISCV vector tuple(min_num_elts=2, nf=6) +def riscv_nxv2i8x7 : VTVecTup<112, 7, i8, 220>; // RISCV vector tuple(min_num_elts=2, nf=7) +def riscv_nxv2i8x8 : VTVecTup<128, 8, i8, 221>; // RISCV vector tuple(min_num_elts=2, nf=8) +def riscv_nxv4i8x2 : VTVecTup<64, 2, i8, 222>; // RISCV vector tuple(min_num_elts=4, nf=2) +def riscv_nxv4i8x3 : VTVecTup<96, 3, i8, 223>; // RISCV vector tuple(min_num_elts=4, nf=3) +def riscv_nxv4i8x4 : VTVecTup<128, 4, i8, 224>; // RISCV vector tuple(min_num_elts=4, nf=4) +def riscv_nxv4i8x5 : VTVecTup<160, 5, i8, 225>; // RISCV vector tuple(min_num_elts=4, nf=5) +def riscv_nxv4i8x6 : VTVecTup<192, 6, i8, 226>; // RISCV vector tuple(min_num_elts=4, nf=6) +def riscv_nxv4i8x7 : VTVecTup<224, 7, i8, 227>; // RISCV vector tuple(min_num_elts=4, nf=7) +def riscv_nxv4i8x8 : VTVecTup<256, 8, i8, 228>; // RISCV vector tuple(min_num_elts=4, nf=8) +def riscv_nxv8i8x2 : VTVecTup<128, 2, i8, 229>; // RISCV vector tuple(min_num_elts=8, nf=2) +def riscv_nxv8i8x3 : VTVecTup<192, 3, i8, 230>; // RISCV vector tuple(min_num_elts=8, nf=3) +def riscv_nxv8i8x4 : VTVecTup<256, 4, i8, 231>; // RISCV vector tuple(min_num_elts=8, nf=4) +def riscv_nxv8i8x5 : VTVecTup<320, 5, i8, 232>; // RISCV vector tuple(min_num_elts=8, nf=5) +def riscv_nxv8i8x6 : VTVecTup<384, 6, i8, 233>; // RISCV vector tuple(min_num_elts=8, nf=6) +def riscv_nxv8i8x7 : VTVecTup<448, 7, i8, 234>; // RISCV vector tuple(min_num_elts=8, nf=7) +def riscv_nxv8i8x8 : VTVecTup<512, 8, i8, 235>; // RISCV vector tuple(min_num_elts=8, nf=8) +def riscv_nxv16i8x2 : VTVecTup<256, 2, i8, 236>; // RISCV vector tuple(min_num_elts=16, nf=2) +def riscv_nxv16i8x3 : VTVecTup<384, 3, i8, 237>; // RISCV vector tuple(min_num_elts=16, nf=3) +def riscv_nxv16i8x4 : VTVecTup<512, 4, i8, 238>; // RISCV vector tuple(min_num_elts=16, nf=4) +def riscv_nxv32i8x2 : VTVecTup<512, 2, i8, 239>; // RISCV vector tuple(min_num_elts=32, nf=2) + +def x86mmx : ValueType<64, 240>; // X86 MMX value +def Glue : ValueType<0, 241>; // Pre-RA sched glue +def isVoid : ValueType<0, 242>; // Produces no value +def untyped : ValueType<8, 243> { // Produces an untyped value let LLVMName = "Untyped"; } -def funcref : ValueType<0, 232>; // WebAssembly's funcref type -def externref : ValueType<0, 233>; // WebAssembly's externref type -def exnref : ValueType<0, 234>; // WebAssembly's exnref type -def x86amx : ValueType<8192, 235>; // X86 AMX value -def i64x8 : ValueType<512, 236>; // 8 Consecutive GPRs (AArch64) +def funcref : ValueType<0, 244>; // WebAssembly's funcref type +def externref : ValueType<0, 245>; // WebAssembly's externref type +def exnref : ValueType<0, 246>; // WebAssembly's exnref type +def x86amx : ValueType<8192, 247>; // X86 AMX value +def i64x8 : ValueType<512, 248>; // 8 Consecutive GPRs (AArch64) def aarch64svcount - : ValueType<16, 237>; // AArch64 predicate-as-counter -def spirvbuiltin : ValueType<0, 238>; // SPIR-V's builtin type + : ValueType<16, 249>; // AArch64 predicate-as-counter +def spirvbuiltin : ValueType<0, 250>; // SPIR-V's builtin type // AMDGPU buffer fat pointer, buffer rsrc + offset, rewritten before MIR translation. // FIXME: Remove this and the getPointerType() override if MVT::i160 is added. -def amdgpuBufferFatPointer : ValueType<160, 239>; +def amdgpuBufferFatPointer : ValueType<160, 251>; // AMDGPU buffer strided pointer, buffer rsrc + index + offset, doesn't reach MIR. // FIXME: Remove this and the getPointerType() override if MVT::i82 is added. -def amdgpuBufferStridedPointer : ValueType<192, 240>; +def amdgpuBufferStridedPointer : ValueType<192, 252>; -def aarch64mfp8 : ValueType<8, 241>; // 8-bit value in FPR (AArch64) +def aarch64mfp8 : ValueType<8, 253>; // 8-bit value in FPR (AArch64) let isNormalValueType = false in { def token : ValueType<0, 504>; // TokenTy diff --git a/llvm/include/llvm/Frontend/HLSL/HLSLBinding.h b/llvm/include/llvm/Frontend/HLSL/HLSLBinding.h new file mode 100644 index 0000000..70a2eeb --- /dev/null +++ b/llvm/include/llvm/Frontend/HLSL/HLSLBinding.h @@ -0,0 +1,162 @@ +//===- HLSLBinding.h - Representation for resource bindings in HLSL -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file contains objects to represent resource bindings. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FRONTEND_HLSL_HLSLBINDING_H +#define LLVM_FRONTEND_HLSL_HLSLBINDING_H + +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DXILABI.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { +namespace hlsl { + +/// BindingInfo represents the ranges of bindings and free space for each +/// `dxil::ResourceClass`. This can represent HLSL-level bindings as well as +/// bindings described in root signatures, and can be used for analysis of +/// overlapping or missing bindings as well as for finding space for implicit +/// bindings. +/// +/// As an example, given these resource bindings: +/// +/// RWBuffer<float> A[10] : register(u3); +/// RWBuffer<float> B[] : register(u5, space2) +/// +/// The binding info for UAV bindings should look like this: +/// +/// UAVSpaces { +/// ResClass = ResourceClass::UAV, +/// Spaces = { +/// { Space = 0u, FreeRanges = {{ 0u, 2u }, { 13u, ~0u }} }, +/// { Space = 2u, FreeRanges = {{ 0u, 4u }} } +/// } +/// } +class BindingInfo { +public: + struct BindingRange { + uint32_t LowerBound; + uint32_t UpperBound; + BindingRange(uint32_t LB, uint32_t UB) : LowerBound(LB), UpperBound(UB) {} + }; + + struct RegisterSpace { + uint32_t Space; + SmallVector<BindingRange> FreeRanges; + RegisterSpace(uint32_t Space) : Space(Space) { + FreeRanges.emplace_back(0, ~0u); + } + // Size == -1 means unbounded array + LLVM_ABI std::optional<uint32_t> findAvailableBinding(int32_t Size); + }; + + struct BindingSpaces { + dxil::ResourceClass RC; + llvm::SmallVector<RegisterSpace> Spaces; + BindingSpaces(dxil::ResourceClass RC) : RC(RC) {} + LLVM_ABI RegisterSpace &getOrInsertSpace(uint32_t Space); + }; + +private: + BindingSpaces SRVSpaces{dxil::ResourceClass::SRV}; + BindingSpaces UAVSpaces{dxil::ResourceClass::UAV}; + BindingSpaces CBufferSpaces{dxil::ResourceClass::CBuffer}; + BindingSpaces SamplerSpaces{dxil::ResourceClass::Sampler}; + +public: + BindingSpaces &getBindingSpaces(dxil::ResourceClass RC) { + switch (RC) { + case dxil::ResourceClass::SRV: + return SRVSpaces; + case dxil::ResourceClass::UAV: + return UAVSpaces; + case dxil::ResourceClass::CBuffer: + return CBufferSpaces; + case dxil::ResourceClass::Sampler: + return SamplerSpaces; + } + + llvm_unreachable("Invalid resource class"); + } + const BindingSpaces &getBindingSpaces(dxil::ResourceClass RC) const { + return const_cast<BindingInfo *>(this)->getBindingSpaces(RC); + } + + // Size == -1 means unbounded array + LLVM_ABI std::optional<uint32_t> + findAvailableBinding(dxil::ResourceClass RC, uint32_t Space, int32_t Size); + + friend class BindingInfoBuilder; +}; + +/// Builder class for creating a /c BindingInfo. +class BindingInfoBuilder { +public: + struct Binding { + dxil::ResourceClass RC; + uint32_t Space; + uint32_t LowerBound; + uint32_t UpperBound; + const void *Cookie; + + Binding(dxil::ResourceClass RC, uint32_t Space, uint32_t LowerBound, + uint32_t UpperBound, const void *Cookie) + : RC(RC), Space(Space), LowerBound(LowerBound), UpperBound(UpperBound), + Cookie(Cookie) {} + + bool isUnbounded() const { return UpperBound == ~0U; } + + bool operator==(const Binding &RHS) const { + return std::tie(RC, Space, LowerBound, UpperBound, Cookie) == + std::tie(RHS.RC, RHS.Space, RHS.LowerBound, RHS.UpperBound, + RHS.Cookie); + } + bool operator!=(const Binding &RHS) const { return !(*this == RHS); } + + bool operator<(const Binding &RHS) const { + return std::tie(RC, Space, LowerBound) < + std::tie(RHS.RC, RHS.Space, RHS.LowerBound); + } + }; + +private: + SmallVector<Binding> Bindings; + +public: + void trackBinding(dxil::ResourceClass RC, uint32_t Space, uint32_t LowerBound, + uint32_t UpperBound, const void *Cookie) { + Bindings.emplace_back(RC, Space, LowerBound, UpperBound, Cookie); + } + /// Calculate the binding info - \c ReportOverlap will be called once for each + /// overlapping binding. + BindingInfo calculateBindingInfo( + llvm::function_ref<void(const BindingInfoBuilder &Builder, + const Binding &Overlapping)> + ReportOverlap); + + /// Calculate the binding info - \c HasOverlap will be set to indicate whether + /// there are any overlapping bindings. + BindingInfo calculateBindingInfo(bool &HasOverlap) { + HasOverlap = false; + return calculateBindingInfo( + [&HasOverlap](auto, auto) { HasOverlap = true; }); + } + + /// For use in the \c ReportOverlap callback of \c calculateBindingInfo - + /// finds a binding that the \c ReportedBinding overlaps with. + const Binding &findOverlapping(const Binding &ReportedBinding) const; +}; + +} // namespace hlsl +} // namespace llvm + +#endif // LLVM_FRONTEND_HLSL_HLSLBINDING_H diff --git a/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h b/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h index 6fa51ed..0bd0774 100644 --- a/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h +++ b/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h @@ -14,6 +14,7 @@ #ifndef LLVM_FRONTEND_HLSL_ROOTSIGNATUREMETADATA_H #define LLVM_FRONTEND_HLSL_ROOTSIGNATUREMETADATA_H +#include "llvm/ADT/StringRef.h" #include "llvm/Frontend/HLSL/HLSLRootSignature.h" #include "llvm/IR/Constants.h" #include "llvm/MC/DXContainerRootSignature.h" @@ -26,6 +27,80 @@ class Metadata; namespace hlsl { namespace rootsig { +template <typename T> +class RootSignatureValidationError + : public ErrorInfo<RootSignatureValidationError<T>> { +public: + static char ID; + StringRef ParamName; + T Value; + + RootSignatureValidationError(StringRef ParamName, T Value) + : ParamName(ParamName), Value(Value) {} + + void log(raw_ostream &OS) const override { + OS << "Invalid value for " << ParamName << ": " << Value; + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } +}; + +class GenericRSMetadataError : public ErrorInfo<GenericRSMetadataError> { +public: + static char ID; + StringRef Message; + MDNode *MD; + + GenericRSMetadataError(StringRef Message, MDNode *MD) + : Message(Message), MD(MD) {} + + void log(raw_ostream &OS) const override { + OS << Message; + if (MD) { + OS << "\n"; + MD->printTree(OS); + } + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } +}; + +class InvalidRSMetadataFormat : public ErrorInfo<InvalidRSMetadataFormat> { +public: + static char ID; + StringRef ElementName; + + InvalidRSMetadataFormat(StringRef ElementName) : ElementName(ElementName) {} + + void log(raw_ostream &OS) const override { + OS << "Invalid format for " << ElementName; + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } +}; + +class InvalidRSMetadataValue : public ErrorInfo<InvalidRSMetadataValue> { +public: + static char ID; + StringRef ParamName; + + InvalidRSMetadataValue(StringRef ParamName) : ParamName(ParamName) {} + + void log(raw_ostream &OS) const override { + OS << "Invalid value for " << ParamName; + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } +}; + class MetadataBuilder { public: MetadataBuilder(llvm::LLVMContext &Ctx, ArrayRef<RootElement> Elements) @@ -66,29 +141,27 @@ class MetadataParser { public: MetadataParser(MDNode *Root) : Root(Root) {} - LLVM_ABI bool ParseRootSignature(LLVMContext *Ctx, - mcdxbc::RootSignatureDesc &RSD); + LLVM_ABI llvm::Expected<llvm::mcdxbc::RootSignatureDesc> + ParseRootSignature(uint32_t Version); private: - bool parseRootFlags(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD, - MDNode *RootFlagNode); - bool parseRootConstants(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD, - MDNode *RootConstantNode); - bool parseRootDescriptors(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD, - MDNode *RootDescriptorNode, - RootSignatureElementKind ElementKind); - bool parseDescriptorRange(LLVMContext *Ctx, mcdxbc::DescriptorTable &Table, - MDNode *RangeDescriptorNode); - bool parseDescriptorTable(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD, - MDNode *DescriptorTableNode); - bool parseRootSignatureElement(LLVMContext *Ctx, - mcdxbc::RootSignatureDesc &RSD, - MDNode *Element); - bool parseStaticSampler(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD, - MDNode *StaticSamplerNode); - - bool validateRootSignature(LLVMContext *Ctx, - const llvm::mcdxbc::RootSignatureDesc &RSD); + llvm::Error parseRootFlags(mcdxbc::RootSignatureDesc &RSD, + MDNode *RootFlagNode); + llvm::Error parseRootConstants(mcdxbc::RootSignatureDesc &RSD, + MDNode *RootConstantNode); + llvm::Error parseRootDescriptors(mcdxbc::RootSignatureDesc &RSD, + MDNode *RootDescriptorNode, + RootSignatureElementKind ElementKind); + llvm::Error parseDescriptorRange(mcdxbc::DescriptorTable &Table, + MDNode *RangeDescriptorNode); + llvm::Error parseDescriptorTable(mcdxbc::RootSignatureDesc &RSD, + MDNode *DescriptorTableNode); + llvm::Error parseRootSignatureElement(mcdxbc::RootSignatureDesc &RSD, + MDNode *Element); + llvm::Error parseStaticSampler(mcdxbc::RootSignatureDesc &RSD, + MDNode *StaticSamplerNode); + + llvm::Error validateRootSignature(const llvm::mcdxbc::RootSignatureDesc &RSD); MDNode *Root; }; diff --git a/llvm/include/llvm/Frontend/Offloading/PropertySet.h b/llvm/include/llvm/Frontend/Offloading/PropertySet.h new file mode 100644 index 0000000..d198d3e --- /dev/null +++ b/llvm/include/llvm/Frontend/Offloading/PropertySet.h @@ -0,0 +1,33 @@ +///===- llvm/Frontend/Offloading/PropertySet.h ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +///===---------------------------------------------------------------------===// +/// \file This file defines PropertySetRegistry and PropertyValue types and +/// provides helper functions to translate PropertySetRegistry from/to JSON. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Error.h" + +#include <map> +#include <variant> + +namespace llvm { +class raw_ostream; +class MemoryBufferRef; + +namespace offloading { + +using ByteArray = SmallVector<unsigned char, 0>; +using PropertyValue = std::variant<uint32_t, ByteArray>; +using PropertySet = std::map<std::string, PropertyValue>; +using PropertySetRegistry = std::map<std::string, PropertySet>; + +void writePropertiesToJSON(const PropertySetRegistry &P, raw_ostream &O); +Expected<PropertySetRegistry> readPropertiesFromJSON(MemoryBufferRef Buf); + +} // namespace offloading +} // namespace llvm diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index 7919f7a..ce1cedc 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -578,8 +578,9 @@ struct DynamicAllocatorsT { template <typename T, typename I, typename E> // struct EnterT { using List = ObjectListT<I, E>; - using WrapperTrait = std::true_type; - List v; + ENUM(Modifier, Automap); + using TupleTrait = std::true_type; + std::tuple<OPT(Modifier), List> t; }; // V5.2: [5.6.2] `exclusive` clause diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 1b94657..79f25bb 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -179,7 +179,7 @@ def OMPC_DynamicAllocators : Clause<[Spelling<"dynamic_allocators">]> { let clangClass = "OMPDynamicAllocatorsClause"; } def OMPC_Enter : Clause<[Spelling<"enter">]> { - let flangClass = "OmpObjectList"; + let flangClass = "OmpEnterClause"; } def OMPC_Exclusive : Clause<[Spelling<"exclusive">]> { let clangClass = "OMPExclusiveClause"; @@ -1460,7 +1460,7 @@ def OMP_DoSimd : Directive<[Spelling<"do simd">]> { ]; let allowedOnceClauses = [ VersionedClause<OMPC_Collapse>, - VersionedClause<OMPC_If>, + VersionedClause<OMPC_If, 50>, VersionedClause<OMPC_NoWait>, VersionedClause<OMPC_Order, 50>, VersionedClause<OMPC_Ordered>, diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 0318427..2e13896 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -996,14 +996,6 @@ public: return cast<PointerType>(getRawDest()->getType())->getAddressSpace(); } - /// FIXME: Remove this function once transition to Align is over. - /// Use getDestAlign() instead. - LLVM_DEPRECATED("Use getDestAlign() instead", "getDestAlign") - unsigned getDestAlignment() const { - if (auto MA = getParamAlign(ARG_DEST)) - return MA->value(); - return 0; - } MaybeAlign getDestAlign() const { return getParamAlign(ARG_DEST); } /// Set the specified arguments of the instruction. @@ -1057,15 +1049,6 @@ public: return cast<PointerType>(getRawSource()->getType())->getAddressSpace(); } - /// FIXME: Remove this function once transition to Align is over. - /// Use getSourceAlign() instead. - LLVM_DEPRECATED("Use getSourceAlign() instead", "getSourceAlign") - unsigned getSourceAlignment() const { - if (auto MA = BaseCL::getParamAlign(ARG_SOURCE)) - return MA->value(); - return 0; - } - MaybeAlign getSourceAlign() const { return BaseCL::getParamAlign(ARG_SOURCE); } diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 1da4e36..469bdb4 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -593,6 +593,14 @@ def int_amdgcn_tanh : DefaultAttrsIntrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; +def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic< + [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] +>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">; + +def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic< + [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] +>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">; + def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic< [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable] >, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">; @@ -601,32 +609,108 @@ def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic< [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable] >, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">; -class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic< - [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] +def int_amdgcn_cvt_pk_fp8_f16 + : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty], + [IntrNoMem, IntrSpeculatable]>, + ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">; + +def int_amdgcn_cvt_pk_bf8_f16 + : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty], + [IntrNoMem, IntrSpeculatable]>, + ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">; + +// llvm.amdgcn.cvt.sr.fp8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3] +// byte_sel selects byte to write in vdst. +def int_amdgcn_cvt_sr_fp8_f16 : DefaultAttrsIntrinsic< + [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>] +>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f16">; + +// llvm.amdgcn.cvt.sr.bf8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3] +// byte_sel selects byte to write in vdst. +def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic< + [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>] +>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">; + +// llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..7] +class AMDGPUCvtScaleIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic< + [DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; -class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMType Src1Ty, string name> : DefaultAttrsIntrinsic< - [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] +class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic< + [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleF32SRIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic< [DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >, ClangBuiltin<"__builtin_amdgcn_"#name>; +def int_amdgcn_cvt_scale_pk8_f16_fp8 : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty, llvm_v2i32_ty, "cvt_scale_pk8_f16_fp8">; +def int_amdgcn_cvt_scale_pk8_bf16_fp8 : AMDGPUCvtScaleIntrinsic<llvm_v8bf16_ty, llvm_v2i32_ty, "cvt_scale_pk8_bf16_fp8">; +def int_amdgcn_cvt_scale_pk8_f16_bf8 : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty, llvm_v2i32_ty, "cvt_scale_pk8_f16_bf8">; +def int_amdgcn_cvt_scale_pk8_bf16_bf8 : AMDGPUCvtScaleIntrinsic<llvm_v8bf16_ty, llvm_v2i32_ty, "cvt_scale_pk8_bf16_bf8">; +def int_amdgcn_cvt_scale_pk8_f16_fp4 : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty, llvm_i32_ty, "cvt_scale_pk8_f16_fp4">; +def int_amdgcn_cvt_scale_pk8_bf16_fp4 : AMDGPUCvtScaleIntrinsic<llvm_v8bf16_ty, llvm_i32_ty, "cvt_scale_pk8_bf16_fp4">; +def int_amdgcn_cvt_scale_pk8_f32_fp8 : AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty, llvm_v2i32_ty, "cvt_scale_pk8_f32_fp8">; +def int_amdgcn_cvt_scale_pk8_f32_bf8 : AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty, llvm_v2i32_ty, "cvt_scale_pk8_f32_bf8">; +def int_amdgcn_cvt_scale_pk8_f32_fp4 : AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty, llvm_i32_ty, "cvt_scale_pk8_f32_fp4">; +def int_amdgcn_cvt_scale_pk16_f16_bf6 : AMDGPUCvtScaleIntrinsic<llvm_v16f16_ty, llvm_v3i32_ty, "cvt_scale_pk16_f16_bf6">; +def int_amdgcn_cvt_scale_pk16_bf16_bf6 : AMDGPUCvtScaleIntrinsic<llvm_v16bf16_ty, llvm_v3i32_ty, "cvt_scale_pk16_bf16_bf6">; +def int_amdgcn_cvt_scale_pk16_f16_fp6 : AMDGPUCvtScaleIntrinsic<llvm_v16f16_ty, llvm_v3i32_ty, "cvt_scale_pk16_f16_fp6">; +def int_amdgcn_cvt_scale_pk16_bf16_fp6 : AMDGPUCvtScaleIntrinsic<llvm_v16bf16_ty, llvm_v3i32_ty, "cvt_scale_pk16_bf16_fp6">; +def int_amdgcn_cvt_scale_pk16_f32_fp6 : AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty, llvm_v3i32_ty, "cvt_scale_pk16_f32_fp6">; +def int_amdgcn_cvt_scale_pk16_f32_bf6 : AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty, llvm_v3i32_ty, "cvt_scale_pk16_f32_bf6">; + +class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMType Src1Ty, string name> : DefaultAttrsIntrinsic< + [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] +>, ClangBuiltin<"__builtin_amdgcn_"#name>; + def int_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_pk32_fp6_f16">; def int_amdgcn_cvt_scalef32_pk32_bf6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_pk32_bf6_f16">; +def int_amdgcn_cvt_scalef32_pk8_fp8_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8bf16_ty, "cvt_scalef32_pk8_fp8_bf16">; +def int_amdgcn_cvt_scalef32_pk8_bf8_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8bf16_ty, "cvt_scalef32_pk8_bf8_bf16">; def int_amdgcn_cvt_scalef32_pk32_fp6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_pk32_fp6_bf16">; def int_amdgcn_cvt_scalef32_pk32_bf6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_pk32_bf6_bf16">; +def int_amdgcn_cvt_scalef32_pk8_fp8_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f16_ty, "cvt_scalef32_pk8_fp8_f16">; +def int_amdgcn_cvt_scalef32_pk8_bf8_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f16_ty, "cvt_scalef32_pk8_bf8_f16">; +def int_amdgcn_cvt_scalef32_pk8_fp8_f32 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f32_ty, "cvt_scalef32_pk8_fp8_f32">; +def int_amdgcn_cvt_scalef32_pk8_bf8_f32 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f32_ty, "cvt_scalef32_pk8_bf8_f32">; +def int_amdgcn_cvt_scalef32_pk8_fp4_f32 : AMDGPUCvtScaleF32Intrinsic<llvm_i32_ty, llvm_v8f32_ty, "cvt_scalef32_pk8_fp4_f32">; +def int_amdgcn_cvt_scalef32_pk8_fp4_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_i32_ty, llvm_v8f16_ty, "cvt_scalef32_pk8_fp4_f16">; +def int_amdgcn_cvt_scalef32_pk8_fp4_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_i32_ty, llvm_v8bf16_ty, "cvt_scalef32_pk8_fp4_bf16">; +def int_amdgcn_cvt_scalef32_pk16_fp6_f32 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16f32_ty, "cvt_scalef32_pk16_fp6_f32">; +def int_amdgcn_cvt_scalef32_pk16_bf6_f32 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16f32_ty, "cvt_scalef32_pk16_bf6_f32">; +def int_amdgcn_cvt_scalef32_pk16_fp6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16f16_ty, "cvt_scalef32_pk16_fp6_f16">; +def int_amdgcn_cvt_scalef32_pk16_bf6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16f16_ty, "cvt_scalef32_pk16_bf6_f16">; +def int_amdgcn_cvt_scalef32_pk16_fp6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16bf16_ty, "cvt_scalef32_pk16_fp6_bf16">; +def int_amdgcn_cvt_scalef32_pk16_bf6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16bf16_ty, "cvt_scalef32_pk16_bf6_bf16">; + +def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, "cvt_scalef32_sr_pk32_fp6_f32">; +def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, "cvt_scalef32_sr_pk32_bf6_f32">; +def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_fp6_f16">; +def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_bf6_f16">; +def int_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_fp6_bf16">; +def int_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_bf6_bf16">; +def int_amdgcn_cvt_scalef32_sr_pk8_fp8_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8bf16_ty, "cvt_scalef32_sr_pk8_fp8_bf16">; +def int_amdgcn_cvt_scalef32_sr_pk8_bf8_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8bf16_ty, "cvt_scalef32_sr_pk8_bf8_bf16">; +def int_amdgcn_cvt_scalef32_sr_pk8_fp8_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8f16_ty, "cvt_scalef32_sr_pk8_fp8_f16">; +def int_amdgcn_cvt_scalef32_sr_pk8_bf8_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8f16_ty, "cvt_scalef32_sr_pk8_bf8_f16">; +def int_amdgcn_cvt_scalef32_sr_pk8_fp8_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8f32_ty, "cvt_scalef32_sr_pk8_fp8_f32">; +def int_amdgcn_cvt_scalef32_sr_pk8_bf8_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8f32_ty, "cvt_scalef32_sr_pk8_bf8_f32">; +def int_amdgcn_cvt_scalef32_sr_pk8_fp4_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_i32_ty, llvm_v8f32_ty, "cvt_scalef32_sr_pk8_fp4_f32">; +def int_amdgcn_cvt_scalef32_sr_pk8_fp4_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_i32_ty, llvm_v8f16_ty, "cvt_scalef32_sr_pk8_fp4_f16">; +def int_amdgcn_cvt_scalef32_sr_pk8_fp4_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_i32_ty, llvm_v8bf16_ty, "cvt_scalef32_sr_pk8_fp4_bf16">; +def int_amdgcn_cvt_scalef32_sr_pk16_fp6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16f32_ty, "cvt_scalef32_sr_pk16_fp6_f32">; +def int_amdgcn_cvt_scalef32_sr_pk16_bf6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16f32_ty, "cvt_scalef32_sr_pk16_bf6_f32">; +def int_amdgcn_cvt_scalef32_sr_pk16_fp6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16f16_ty, "cvt_scalef32_sr_pk16_fp6_f16">; +def int_amdgcn_cvt_scalef32_sr_pk16_bf6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16f16_ty, "cvt_scalef32_sr_pk16_bf6_f16">; +def int_amdgcn_cvt_scalef32_sr_pk16_fp6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16bf16_ty, "cvt_scalef32_sr_pk16_fp6_bf16">; +def int_amdgcn_cvt_scalef32_sr_pk16_bf6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16bf16_ty, "cvt_scalef32_sr_pk16_bf6_bf16">; + def int_amdgcn_cvt_scalef32_2xpk16_fp6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_fp6_f32">; def int_amdgcn_cvt_scalef32_2xpk16_bf6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_bf6_f32">; -def int_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_bf6_bf16">; -def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_bf6_f16">; -def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, "cvt_scalef32_sr_pk32_bf6_f32">; -def int_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_fp6_bf16">; -def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_fp6_f16">; -def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, "cvt_scalef32_sr_pk32_fp6_f32">; - class AMDGPUCvtScaleFP4FP8BF8ToF1632Intrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic< [DstTy], [llvm_i32_ty, // src @@ -3473,6 +3557,12 @@ def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">, [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>; +// llvm.amdgcn.cvt.pk.fp8.f32.e5m3 int vdst, float srcA, float srcB, int old, imm word_sel +def int_amdgcn_cvt_pk_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32_e5m3">, + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, ImmArg<ArgIndex<3>>]>; + // llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] // byte_sel selects byte to write into vdst. def int_amdgcn_cvt_sr_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f32">, @@ -3486,6 +3576,12 @@ def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">, [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>; +// llvm.amdgcn.cvt.sr.fp8.f32.e5m3 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] +def int_amdgcn_cvt_sr_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32_e5m3">, + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<3>>]>; + // llvm.amdgcn.cvt.off.fp32.i4 int srcA def int_amdgcn_cvt_off_f32_i4: ClangBuiltin<"__builtin_amdgcn_cvt_off_f32_i4">, DefaultAttrsIntrinsic<[llvm_float_ty], @@ -3597,6 +3693,48 @@ def int_amdgcn_sat_pk4_i4_i8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_i4_i8">, def int_amdgcn_sat_pk4_u4_u8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_u4_u8">, DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; +// llvm.amdgcn.permlane.bcast <src0> <src1> <src2> +def int_amdgcn_permlane_bcast : ClangBuiltin<"__builtin_amdgcn_permlane_bcast">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + +// llvm.amdgcn.permlane.up <src0> <src1> <src2> +def int_amdgcn_permlane_up : ClangBuiltin<"__builtin_amdgcn_permlane_up">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + +// llvm.amdgcn.permlane.down <src0> <src1> <src2> +def int_amdgcn_permlane_down : ClangBuiltin<"__builtin_amdgcn_permlane_down">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + +// llvm.amdgcn.permlane.xor <src0> <src1> <src2> +def int_amdgcn_permlane_xor : ClangBuiltin<"__builtin_amdgcn_permlane_xor">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + +// llvm.amdgcn.permlane.idx.gen <src0> <src1> +def int_amdgcn_permlane_idx_gen : ClangBuiltin<"__builtin_amdgcn_permlane_idx_gen">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + +def int_amdgcn_perm_pk16_b4_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b4_u4">, + DefaultAttrsIntrinsic<[llvm_v2i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], + [IntrNoMem, IntrSpeculatable]>; + +def int_amdgcn_perm_pk16_b6_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b6_u4">, + DefaultAttrsIntrinsic<[llvm_v3i32_ty], [llvm_i32_ty, llvm_i64_ty, llvm_v2i32_ty], + [IntrNoMem, IntrSpeculatable]>; + +def int_amdgcn_perm_pk16_b8_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b8_u4">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_v2i32_ty], + [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index e63a41f..99f975f 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -1717,6 +1717,16 @@ let TargetPrefix = "riscv" in { llvm_anyint_ty], [NoCapture<ArgIndex<0>>, IntrReadMem]>; + // Input: (pointer, offset, mask, vl) + def int_riscv_sseg # nf # _load_mask + : DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty], + !listsplat(LLVMMatchType<0>, + !add(nf, -1))), + [llvm_anyptr_ty, llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty], + [NoCapture<ArgIndex<0>>, IntrReadMem]>; + // Input: (<stored values>..., pointer, mask, vl) def int_riscv_seg # nf # _store_mask : DefaultAttrsIntrinsic<[], diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index d8e632b..323c478 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -542,21 +542,23 @@ private: ArrayRef<SymbolResolution> Res, unsigned Partition, bool InSummary); - // These functions take a range of symbol resolutions [ResI, ResE) and consume - // the resolutions used by a single input module by incrementing ResI. After - // these functions return, [ResI, ResE) will refer to the resolution range for - // the remaining modules in the InputFile. - Error addModule(InputFile &Input, unsigned ModI, - const SymbolResolution *&ResI, const SymbolResolution *ResE); - - Expected<RegularLTOState::AddedModule> - addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, - const SymbolResolution *&ResI, const SymbolResolution *ResE); + // These functions take a range of symbol resolutions and consume the + // resolutions used by a single input module. Functions return ranges refering + // to the resolutions for the remaining modules in the InputFile. + Expected<ArrayRef<SymbolResolution>> + addModule(InputFile &Input, ArrayRef<SymbolResolution> InputRes, + unsigned ModI, ArrayRef<SymbolResolution> Res); + + Expected<std::pair<RegularLTOState::AddedModule, ArrayRef<SymbolResolution>>> + addRegularLTO(InputFile &Input, ArrayRef<SymbolResolution> InputRes, + BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, + ArrayRef<SymbolResolution> Res); Error linkRegularLTO(RegularLTOState::AddedModule Mod, bool LivenessFromIndex); - Error addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, - const SymbolResolution *&ResI, const SymbolResolution *ResE); + Expected<ArrayRef<SymbolResolution>> + addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, + ArrayRef<SymbolResolution> Res); Error runRegularLTO(AddStreamFn AddStream); Error runThinLTO(AddStreamFn AddStream, FileCache Cache, diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h index bfc1175..311d9ca 100644 --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -117,14 +117,13 @@ public: void maybeAddReloc(const MCFragment &, const MCFixup &, const MCValue &, uint64_t &Value, bool IsResolved); - /// Determine if a relocation is required. In addition, - /// Apply the \p Value for given \p Fixup into the provided data fragment, at - /// the offset specified by the fixup and following the fixup kind as - /// appropriate. Errors (such as an out of range fixup value) should be - /// reported via \p Ctx. + // Determine if a relocation is required. In addition, apply `Value` to the + // `Data` fragment at the specified fixup offset if applicable. `Data` points + // to the first byte of the fixup offset, which may be at the content's end if + // the fixup is zero-sized. virtual void applyFixup(const MCFragment &, const MCFixup &, - const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsResolved) = 0; + const MCValue &Target, uint8_t *Data, uint64_t Value, + bool IsResolved) = 0; /// @} diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h index 4853701..ffde5ca2 100644 --- a/llvm/include/llvm/MC/MCAssembler.h +++ b/llvm/include/llvm/MC/MCAssembler.h @@ -99,8 +99,7 @@ private: /// \param RecordReloc Record relocation if needed. /// relocation. bool evaluateFixup(const MCFragment &F, MCFixup &Fixup, MCValue &Target, - uint64_t &Value, bool RecordReloc, - MutableArrayRef<char> Contents) const; + uint64_t &Value, bool RecordReloc, uint8_t *Data) const; /// Check whether a fixup can be satisfied, or whether it needs to be relaxed /// (increased in size, in order to hold its value correctly). diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index 5ce58ae..d69560c 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -69,6 +69,9 @@ protected: /// Language Specific Data Area information is emitted to. MCSection *LSDASection = nullptr; + /// Section containing call graph metadata. + MCSection *CallGraphSection = nullptr; + /// If exception handling is supported by the target and the target can /// support a compact representation of the CIE and FDE, this is the section /// to emit them into. @@ -359,6 +362,8 @@ public: MCSection *getFaultMapSection() const { return FaultMapSection; } MCSection *getRemarksSection() const { return RemarksSection; } + MCSection *getCallGraphSection(const MCSection &TextSec) const; + MCSection *getStackSizesSection(const MCSection &TextSec) const; MCSection *getBBAddrMapSection(const MCSection &TextSec) const; diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index 5ac7aba..eb875a7 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -52,7 +52,7 @@ class MCObjectStreamer : public MCStreamer { DenseMap<const MCSymbol *, SmallVector<PendingAssignment, 1>> pendingAssignments; - SmallVector<std::unique_ptr<char[]>, 0> FragStorage; + SmallVector<std::unique_ptr<uint8_t[]>, 0> FragStorage; // Available bytes in the current block for trailing data or new fragments. size_t FragSpace = 0; @@ -88,8 +88,8 @@ public: // Add a fragment with a variable-size tail and start a new empty fragment. void insert(MCFragment *F); - char *getCurFragEnd() const { - return reinterpret_cast<char *>(CurFrag + 1) + CurFrag->getFixedSize(); + uint8_t *getCurFragEnd() const { + return reinterpret_cast<uint8_t *>(CurFrag + 1) + CurFrag->getFixedSize(); } MCFragment *allocFragSpace(size_t Headroom); // Add a new fragment to the current section without a variable-size tail. @@ -97,7 +97,7 @@ public: void ensureHeadroom(size_t Headroom); void appendContents(ArrayRef<char> Contents); - void appendContents(size_t Num, char Elt); + void appendContents(size_t Num, uint8_t Elt); // Add a fixup to the current fragment. Call ensureHeadroom beforehand to // ensure the fixup and appended content apply to the same fragment. void addFixup(const MCExpr *Value, MCFixupKind Kind); diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h index 2e929d8..4022ea7 100644 --- a/llvm/include/llvm/MC/MCSection.h +++ b/llvm/include/llvm/MC/MCSection.h @@ -80,19 +80,22 @@ private: FragmentType Kind; -protected: + //== Used by certain fragment types for better packing. + + // The number of fixups for the optional variable-size tail must be small. + uint8_t VarFixupSize = 0; + bool LinkerRelaxable : 1; - /// Used by certain fragment types for better packing. - /// /// FT_Data, FT_Relaxable bool HasInstructions : 1; /// FT_Relaxable, x86-specific bool AllowAutoPadding : 1; // Track content and fixups for the fixed-size part as fragments are - // appended to the section. The content remains immutable, except when - // modified by applyFixup. + // appended to the section. The content is stored as trailing data of the + // MCFragment. The content remains immutable, except when modified by + // applyFixup. uint32_t FixedSize = 0; uint32_t FixupStart = 0; uint32_t FixupEnd = 0; @@ -102,7 +105,6 @@ protected: uint32_t VarContentStart = 0; uint32_t VarContentEnd = 0; uint32_t VarFixupStart = 0; - uint32_t VarFixupEnd = 0; const MCSubtargetInfo *STI = nullptr; @@ -296,13 +298,8 @@ public: } }; -/// Interface implemented by fragments that contain encoded instructions and/or -/// data. -class MCEncodedFragment : public MCFragment { -protected: - MCEncodedFragment(MCFragment::FragmentType FType, bool HasInstructions) - : MCFragment(FType, HasInstructions) {} -}; +// MCFragment subclasses do not use the fixed-size part or variable-size tail of +// MCFragment. Instead, they encode content in a specialized way. class MCFillFragment : public MCFragment { uint8_t ValueSize; @@ -318,7 +315,7 @@ class MCFillFragment : public MCFragment { public: MCFillFragment(uint64_t Value, uint8_t VSize, const MCExpr &NumValues, SMLoc Loc) - : MCFragment(FT_Fill, false), ValueSize(VSize), Value(Value), + : MCFragment(FT_Fill), ValueSize(VSize), Value(Value), NumValues(NumValues), Loc(Loc) {} uint64_t getValue() const { return Value; } @@ -349,7 +346,7 @@ class MCNopsFragment : public MCFragment { public: MCNopsFragment(int64_t NumBytes, int64_t ControlledNopLength, SMLoc L, const MCSubtargetInfo &STI) - : MCFragment(FT_Nops, false), Size(NumBytes), + : MCFragment(FT_Nops), Size(NumBytes), ControlledNopLength(ControlledNopLength), Loc(L), STI(STI) {} int64_t getNumBytes() const { return Size; } @@ -376,7 +373,7 @@ class MCOrgFragment : public MCFragment { public: MCOrgFragment(const MCExpr &Offset, int8_t Value, SMLoc Loc) - : MCFragment(FT_Org, false), Value(Value), Offset(&Offset), Loc(Loc) {} + : MCFragment(FT_Org), Value(Value), Offset(&Offset), Loc(Loc) {} const MCExpr &getOffset() const { return *Offset; } @@ -394,8 +391,7 @@ class MCSymbolIdFragment : public MCFragment { const MCSymbol *Sym; public: - MCSymbolIdFragment(const MCSymbol *Sym) - : MCFragment(FT_SymbolId, false), Sym(Sym) {} + MCSymbolIdFragment(const MCSymbol *Sym) : MCFragment(FT_SymbolId), Sym(Sym) {} const MCSymbol *getSymbol() { return Sym; } const MCSymbol *getSymbol() const { return Sym; } @@ -407,7 +403,7 @@ public: /// Fragment representing the binary annotations produced by the /// .cv_inline_linetable directive. -class MCCVInlineLineTableFragment : public MCEncodedFragment { +class MCCVInlineLineTableFragment : public MCFragment { unsigned SiteFuncId; unsigned StartFileId; unsigned StartLineNum; @@ -422,7 +418,7 @@ public: MCCVInlineLineTableFragment(unsigned SiteFuncId, unsigned StartFileId, unsigned StartLineNum, const MCSymbol *FnStartSym, const MCSymbol *FnEndSym) - : MCEncodedFragment(FT_CVInlineLines, false), SiteFuncId(SiteFuncId), + : MCFragment(FT_CVInlineLines), SiteFuncId(SiteFuncId), StartFileId(StartFileId), StartLineNum(StartLineNum), FnStartSym(FnStartSym), FnEndSym(FnEndSym) {} @@ -435,7 +431,7 @@ public: }; /// Fragment representing the .cv_def_range directive. -class MCCVDefRangeFragment : public MCEncodedFragment { +class MCCVDefRangeFragment : public MCFragment { ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges; StringRef FixedSizePortion; @@ -447,8 +443,7 @@ public: MCCVDefRangeFragment( ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges, StringRef FixedSizePortion) - : MCEncodedFragment(FT_CVDefRange, false), - Ranges(Ranges.begin(), Ranges.end()), + : MCFragment(FT_CVDefRange), Ranges(Ranges.begin(), Ranges.end()), FixedSizePortion(FixedSizePortion) {} ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> getRanges() const { @@ -479,8 +474,7 @@ class MCBoundaryAlignFragment : public MCFragment { public: MCBoundaryAlignFragment(Align AlignBoundary, const MCSubtargetInfo &STI) - : MCFragment(FT_BoundaryAlign, false), AlignBoundary(AlignBoundary), - STI(STI) {} + : MCFragment(FT_BoundaryAlign), AlignBoundary(AlignBoundary), STI(STI) {} uint64_t getSize() const { return Size; } void setSize(uint64_t Value) { Size = Value; } @@ -650,11 +644,10 @@ inline ArrayRef<MCFixup> MCFragment::getFixups() const { inline MutableArrayRef<MCFixup> MCFragment::getVarFixups() { return MutableArrayRef(getParent()->FixupStorage) - .slice(VarFixupStart, VarFixupEnd - VarFixupStart); + .slice(VarFixupStart, VarFixupSize); } inline ArrayRef<MCFixup> MCFragment::getVarFixups() const { - return ArrayRef(getParent()->FixupStorage) - .slice(VarFixupStart, VarFixupEnd - VarFixupStart); + return ArrayRef(getParent()->FixupStorage).slice(VarFixupStart, VarFixupSize); } //== FT_Relaxable functions diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index e883f2f..3bf8c29 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -117,7 +117,7 @@ struct FileHeader { llvm::yaml::Hex8 ABIVersion; ELF_ET Type; std::optional<ELF_EM> Machine; - ELF_EF Flags; + std::optional<ELF_EF> Flags; llvm::yaml::Hex64 Entry; std::optional<StringRef> SectionHeaderStringTable; diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc index 3f785bd..26baddd 100644 --- a/llvm/include/llvm/ProfileData/MemProfData.inc +++ b/llvm/include/llvm/ProfileData/MemProfData.inc @@ -33,11 +33,10 @@ (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129) // The version number of the raw binary format. -#define MEMPROF_RAW_VERSION 4ULL +#define MEMPROF_RAW_VERSION 5ULL // Currently supported versions. -#define MEMPROF_RAW_SUPPORTED_VERSIONS \ - { 3ULL, 4ULL } +#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL} #define MEMPROF_V3_MIB_SIZE 132ULL; @@ -229,6 +228,41 @@ void Merge(const MemInfoBlock &newMIB) { } __attribute__((__packed__)); #endif +constexpr int MantissaBits = 12; +constexpr int ExponentBits = 4; +constexpr uint16_t MaxMantissa = (1U << MantissaBits) - 1; +constexpr uint16_t MaxExponent = (1U << ExponentBits) - 1; +constexpr uint64_t MaxRepresentableValue = static_cast<uint64_t>(MaxMantissa) + << MaxExponent; + +// Encodes a 64-bit unsigned integer into a 16-bit scaled integer format. +inline uint16_t encodeHistogramCount(uint64_t Count) { + if (Count == 0) + return 0; + + if (Count > MaxRepresentableValue) + Count = MaxRepresentableValue; + + if (Count <= MaxMantissa) + return Count; + + uint64_t M = Count; + uint16_t E = 0; + while (M > MaxMantissa) { + M = (M + 1) >> 1; + E++; + } + return (E << MantissaBits) | static_cast<uint16_t>(M); +} + +// Decodes a 16-bit scaled integer and returns the +// decoded 64-bit unsigned integer. +inline uint64_t decodeHistogramCount(uint16_t EncodedValue) { + const uint16_t E = EncodedValue >> MantissaBits; + const uint16_t M = EncodedValue & MaxMantissa; + return static_cast<uint64_t>(M) << E; +} + } // namespace memprof } // namespace llvm diff --git a/llvm/include/llvm/Support/DebugLog.h b/llvm/include/llvm/Support/DebugLog.h index 8fca2d5..a331295 100644 --- a/llvm/include/llvm/Support/DebugLog.h +++ b/llvm/include/llvm/Support/DebugLog.h @@ -61,8 +61,10 @@ namespace llvm { for (bool _c = \ (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE, LEVEL)); \ _c; _c = false) \ + for (::llvm::impl::RAIINewLineStream NewLineStream{(STREAM)}; _c; \ + _c = false) \ ::llvm::impl::raw_ldbg_ostream{ \ - ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), (STREAM)} \ + ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), NewLineStream} \ .asLvalue() #define DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, LEVEL, TYPE, FILE) \ @@ -81,14 +83,15 @@ namespace llvm { namespace impl { -/// A raw_ostream that tracks `\n` and print the prefix. +/// A raw_ostream that tracks `\n` and print the prefix after each +/// newline. class LLVM_ABI raw_ldbg_ostream final : public raw_ostream { std::string Prefix; raw_ostream &Os; - bool HasPendingNewline = true; + bool HasPendingNewline; - /// Split the line on newlines and insert the prefix before each newline. - /// Forward everything to the underlying stream. + /// Split the line on newlines and insert the prefix before each + /// newline. Forward everything to the underlying stream. void write_impl(const char *Ptr, size_t Size) final { auto Str = StringRef(Ptr, Size); // Handle the initial prefix. @@ -109,22 +112,18 @@ class LLVM_ABI raw_ldbg_ostream final : public raw_ostream { } void emitPrefix() { Os.write(Prefix.c_str(), Prefix.size()); } void writeWithPrefix(StringRef Str) { - if (HasPendingNewline) { - emitPrefix(); - HasPendingNewline = false; - } + flushEol(); Os.write(Str.data(), Str.size()); } public: - explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os) - : Prefix(std::move(Prefix)), Os(Os) { + explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os, + bool HasPendingNewline = true) + : Prefix(std::move(Prefix)), Os(Os), + HasPendingNewline(HasPendingNewline) { SetUnbuffered(); } - ~raw_ldbg_ostream() final { - flushEol(); - Os << '\n'; - } + ~raw_ldbg_ostream() final { flushEol(); } void flushEol() { if (HasPendingNewline) { emitPrefix(); @@ -135,10 +134,22 @@ public: /// Forward the current_pos method to the underlying stream. uint64_t current_pos() const final { return Os.tell(); } - /// Some of the `<<` operators expect an lvalue, so we trick the type system. + /// Some of the `<<` operators expect an lvalue, so we trick the type + /// system. raw_ldbg_ostream &asLvalue() { return *this; } }; +/// A raw_ostream that prints a newline on destruction, useful for LDBG() +class RAIINewLineStream final : public raw_ostream { + raw_ostream &Os; + +public: + RAIINewLineStream(raw_ostream &Os) : Os(Os) { SetUnbuffered(); } + ~RAIINewLineStream() { Os << '\n'; } + void write_impl(const char *Ptr, size_t Size) final { Os.write(Ptr, Size); } + uint64_t current_pos() const final { return Os.tell(); } +}; + /// Remove the path prefix from the file name. static LLVM_ATTRIBUTE_UNUSED constexpr const char * getShortFileName(const char *path) { diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h index 734b795..d976773 100644 --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -1069,7 +1069,7 @@ public: /// Redirect each of the remapped files from first to second. static std::unique_ptr<RedirectingFileSystem> create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles, - bool UseExternalNames, FileSystem &ExternalFS); + bool UseExternalNames, IntrusiveRefCntPtr<FileSystem> ExternalFS); ErrorOr<Status> status(const Twine &Path) override; bool exists(const Twine &Path) override; diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 719c0ee..e57032a 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -6494,7 +6494,7 @@ struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> { } constexpr static const std::optional<TypeSize> HasNoAllocationSize = - std::optional<TypeSize>(TypeSize(-1, true)); + std::make_optional<TypeSize>(-1, true); LLVM_ABI static const char ID; }; diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index cba3736..43ff084 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -400,19 +400,11 @@ public: /// Returns true if the loop has exactly one uncountable early exit, i.e. an /// uncountable exit that isn't the latch block. - bool hasUncountableEarlyExit() const { - return getUncountableEdge().has_value(); - } + bool hasUncountableEarlyExit() const { return UncountableExitingBB; } /// Returns the uncountable early exiting block, if there is exactly one. BasicBlock *getUncountableEarlyExitingBlock() const { - return hasUncountableEarlyExit() ? getUncountableEdge()->first : nullptr; - } - - /// Returns the destination of the uncountable early exiting block, if there - /// is exactly one. - BasicBlock *getUncountableEarlyExitBlock() const { - return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr; + return UncountableExitingBB; } /// Return true if there is store-load forwarding dependencies. @@ -473,13 +465,6 @@ public: return CountableExitingBlocks; } - /// Returns the loop edge to an uncountable exit, or std::nullopt if there - /// isn't a single such edge. - std::optional<std::pair<BasicBlock *, BasicBlock *>> - getUncountableEdge() const { - return UncountableEdge; - } - private: /// Return true if the pre-header, exiting and latch blocks of \p Lp and all /// its nested loops are considered legal for vectorization. These legal @@ -659,9 +644,9 @@ private: /// the exact backedge taken count is not computable. SmallVector<BasicBlock *, 4> CountableExitingBlocks; - /// Keep track of the loop edge to an uncountable exit, comprising a pair - /// of (Exiting, Exit) blocks, if there is exactly one early exit. - std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge; + /// Keep track of an uncountable exiting block, if there is exactly one early + /// exit. + BasicBlock *UncountableExitingBB = nullptr; }; } // namespace llvm |