diff options
Diffstat (limited to 'llvm/include/llvm')
61 files changed, 901 insertions, 355 deletions
diff --git a/llvm/include/llvm/ADT/Any.h b/llvm/include/llvm/ADT/Any.h index 88dbce9..a29aaa3 100644 --- a/llvm/include/llvm/ADT/Any.h +++ b/llvm/include/llvm/ADT/Any.h @@ -119,7 +119,6 @@ private: template <class T> friend T any_cast(Any &&Value); template <class T> friend const T *any_cast(const Any *Value); template <class T> friend T *any_cast(Any *Value); - template <typename T> friend bool any_isa(const Any &Value); std::unique_ptr<StorageBase> Storage; }; diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h index 956dcbc..93c6bfb 100644 --- a/llvm/include/llvm/Analysis/DXILResource.h +++ b/llvm/include/llvm/Analysis/DXILResource.h @@ -12,6 +12,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Frontend/HLSL/HLSLBinding.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/PassManager.h" @@ -633,86 +634,25 @@ LLVM_ABI ModulePass *createDXILResourceWrapperPassPass(); // register slots to resources with implicit bindings, and in a // post-optimization validation pass that will raise diagnostic about // overlapping bindings. -// -// For example for these resource bindings: -// -// RWBuffer<float> A[10] : register(u3); -// RWBuffer<float> B[] : register(u5, space2) -// -// The analysis result for UAV binding type will look like this: -// -// UAVSpaces { -// ResClass = ResourceClass::UAV, -// Spaces = { -// { Space = 0, FreeRanges = {{ 0, 2 }, { 13, UINT32_MAX }} }, -// { Space = 2, FreeRanges = {{ 0, 4 }} } -// } -// } -// class DXILResourceBindingInfo { -public: - struct BindingRange { - uint32_t LowerBound; - uint32_t UpperBound; - BindingRange(uint32_t LB, uint32_t UB) : LowerBound(LB), UpperBound(UB) {} - }; - - struct RegisterSpace { - uint32_t Space; - SmallVector<BindingRange> FreeRanges; - RegisterSpace(uint32_t Space) : Space(Space) { - FreeRanges.emplace_back(0, UINT32_MAX); - } - // Size == -1 means unbounded array - LLVM_ABI std::optional<uint32_t> findAvailableBinding(int32_t Size); - }; - - struct BindingSpaces { - dxil::ResourceClass RC; - llvm::SmallVector<RegisterSpace> Spaces; - BindingSpaces(dxil::ResourceClass RC) : RC(RC) {} - LLVM_ABI RegisterSpace &getOrInsertSpace(uint32_t Space); - }; - -private: - BindingSpaces SRVSpaces, UAVSpaces, CBufferSpaces, SamplerSpaces; - bool ImplicitBinding; - bool OverlappingBinding; + hlsl::BindingInfo Bindings; + bool HasImplicitBinding = false; + bool HasOverlappingBinding = false; // Populate the resource binding info given explicit resource binding calls // in the module. void populate(Module &M, DXILResourceTypeMap &DRTM); public: - DXILResourceBindingInfo() - : SRVSpaces(dxil::ResourceClass::SRV), - UAVSpaces(dxil::ResourceClass::UAV), - CBufferSpaces(dxil::ResourceClass::CBuffer), - SamplerSpaces(dxil::ResourceClass::Sampler), ImplicitBinding(false), - OverlappingBinding(false) {} - - bool hasImplicitBinding() const { return ImplicitBinding; } - void setHasImplicitBinding(bool Value) { ImplicitBinding = Value; } - bool hasOverlappingBinding() const { return OverlappingBinding; } - - BindingSpaces &getBindingSpaces(dxil::ResourceClass RC) { - switch (RC) { - case dxil::ResourceClass::SRV: - return SRVSpaces; - case dxil::ResourceClass::UAV: - return UAVSpaces; - case dxil::ResourceClass::CBuffer: - return CBufferSpaces; - case dxil::ResourceClass::Sampler: - return SamplerSpaces; - } + bool hasImplicitBinding() const { return HasImplicitBinding; } + void setHasImplicitBinding(bool Value) { HasImplicitBinding = Value; } + bool hasOverlappingBinding() const { return HasOverlappingBinding; } + void setHasOverlappingBinding(bool Value) { HasOverlappingBinding = Value; } - llvm_unreachable("Invalid resource class"); - } - - // Size == -1 means unbounded array LLVM_ABI std::optional<uint32_t> - findAvailableBinding(dxil::ResourceClass RC, uint32_t Space, int32_t Size); + findAvailableBinding(dxil::ResourceClass RC, uint32_t Space, int32_t Size) { + return Bindings.findAvailableBinding(RC, Space, Size); + } friend class DXILResourceBindingAnalysis; friend class DXILResourceBindingWrapperPass; diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h index 498c19b..17f4112 100644 --- a/llvm/include/llvm/Analysis/IR2Vec.h +++ b/llvm/include/llvm/Analysis/IR2Vec.h @@ -164,37 +164,37 @@ public: static_cast<unsigned>(OperandKind::MaxOperandKind); Vocabulary() = default; - Vocabulary(VocabVector &&Vocab); + LLVM_ABI Vocabulary(VocabVector &&Vocab); - bool isValid() const; - unsigned getDimension() const; - size_t size() const; + LLVM_ABI bool isValid() const; + LLVM_ABI unsigned getDimension() const; + LLVM_ABI size_t size() const; static size_t expectedSize() { return MaxOpcodes + MaxTypeIDs + MaxOperandKinds; } /// Helper function to get vocabulary key for a given Opcode - static StringRef getVocabKeyForOpcode(unsigned Opcode); + LLVM_ABI static StringRef getVocabKeyForOpcode(unsigned Opcode); /// Helper function to get vocabulary key for a given TypeID - static StringRef getVocabKeyForTypeID(Type::TypeID TypeID); + LLVM_ABI static StringRef getVocabKeyForTypeID(Type::TypeID TypeID); /// Helper function to get vocabulary key for a given OperandKind - static StringRef getVocabKeyForOperandKind(OperandKind Kind); + LLVM_ABI static StringRef getVocabKeyForOperandKind(OperandKind Kind); /// Helper function to classify an operand into OperandKind - static OperandKind getOperandKind(const Value *Op); + LLVM_ABI static OperandKind getOperandKind(const Value *Op); /// Helpers to return the IDs of a given Opcode, TypeID, or OperandKind - static unsigned getNumericID(unsigned Opcode); - static unsigned getNumericID(Type::TypeID TypeID); - static unsigned getNumericID(const Value *Op); + LLVM_ABI static unsigned getNumericID(unsigned Opcode); + LLVM_ABI static unsigned getNumericID(Type::TypeID TypeID); + LLVM_ABI static unsigned getNumericID(const Value *Op); /// Accessors to get the embedding for a given entity. - const ir2vec::Embedding &operator[](unsigned Opcode) const; - const ir2vec::Embedding &operator[](Type::TypeID TypeId) const; - const ir2vec::Embedding &operator[](const Value *Arg) const; + LLVM_ABI const ir2vec::Embedding &operator[](unsigned Opcode) const; + LLVM_ABI const ir2vec::Embedding &operator[](Type::TypeID TypeId) const; + LLVM_ABI const ir2vec::Embedding &operator[](const Value *Arg) const; /// Const Iterator type aliases using const_iterator = VocabVector::const_iterator; @@ -221,13 +221,13 @@ public: /// Returns the string key for a given index position in the vocabulary. /// This is useful for debugging or printing the vocabulary. Do not use this /// for embedding generation as string based lookups are inefficient. - static StringRef getStringKey(unsigned Pos); + LLVM_ABI static StringRef getStringKey(unsigned Pos); /// Create a dummy vocabulary for testing purposes. - static VocabVector createDummyVocabForTest(unsigned Dim = 1); + LLVM_ABI static VocabVector createDummyVocabForTest(unsigned Dim = 1); - bool invalidate(Module &M, const PreservedAnalyses &PA, - ModuleAnalysisManager::Invalidator &Inv) const; + LLVM_ABI bool invalidate(Module &M, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &Inv) const; }; /// Embedder provides the interface to generate embeddings (vector diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index af6e534..92304ed 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -180,10 +180,12 @@ public: const SmallVectorImpl<Instruction *> &Instrs) const; }; - MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L, + MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC, + DominatorTree *DT, const Loop *L, const DenseMap<Value *, const SCEV *> &SymbolicStrides, unsigned MaxTargetVectorWidthInBits) - : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides), + : PSE(PSE), AC(AC), DT(DT), InnermostLoop(L), + SymbolicStrides(SymbolicStrides), MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {} /// Register the location (instructions are given increasing numbers) @@ -288,6 +290,15 @@ public: return PointerBounds; } + DominatorTree *getDT() const { + assert(DT && "requested DT, but it is not available"); + return DT; + } + AssumptionCache *getAC() const { + assert(AC && "requested AC, but it is not available"); + return AC; + } + private: /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and /// applies dynamic knowledge to simplify SCEV expressions and convert them @@ -296,6 +307,10 @@ private: /// example we might assume a unit stride for a pointer in order to prove /// that a memory access is strided and doesn't wrap. PredicatedScalarEvolution &PSE; + + AssumptionCache *AC; + DominatorTree *DT; + const Loop *InnermostLoop; /// Reference to map of pointer values to @@ -670,7 +685,7 @@ public: LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, - DominatorTree *DT, LoopInfo *LI, + DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, bool AllowPartial = false); /// Return true we can analyze the memory accesses in the loop and there are @@ -922,7 +937,8 @@ LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess( const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap<std::pair<const SCEV *, Type *>, - std::pair<const SCEV *, const SCEV *>> *PointerBounds); + std::pair<const SCEV *, const SCEV *>> *PointerBounds, + DominatorTree *DT, AssumptionCache *AC); class LoopAccessInfoManager { /// The cache. @@ -935,12 +951,13 @@ class LoopAccessInfoManager { LoopInfo &LI; TargetTransformInfo *TTI; const TargetLibraryInfo *TLI = nullptr; + AssumptionCache *AC; public: LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT, LoopInfo &LI, TargetTransformInfo *TTI, - const TargetLibraryInfo *TLI) - : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {} + const TargetLibraryInfo *TLI, AssumptionCache *AC) + : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI), AC(AC) {} LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false); diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h index bff7707..011d599 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h @@ -91,6 +91,10 @@ inline bind_ty<const SCEVUnknown> m_SCEVUnknown(const SCEVUnknown *&V) { return V; } +inline bind_ty<const SCEVAddExpr> m_scev_Add(const SCEVAddExpr *&V) { + return V; +} + /// Match a specified const SCEV *. struct specificscev_ty { const SCEV *Expr; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 7928835..aa4550d 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1950,6 +1950,10 @@ public: const Function &F, SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const; + /// Returns true if GEP should not be used to index into vectors for this + /// target. + LLVM_ABI bool allowVectorElementIndexingUsingGEP() const; + private: std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl; }; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 2ea87b3..abdbca0 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1145,6 +1145,8 @@ public: const Function &F, SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {} + virtual bool allowVectorElementIndexingUsingGEP() const { return true; } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index b55c4e0..6781cd5 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -633,6 +633,9 @@ public: return true; } + /// Return true if this group is full, i.e. it has no gaps. + bool isFull() const { return getNumMembers() == getFactor(); } + private: uint32_t Factor; // Interleave Factor. bool Reverse; diff --git a/llvm/include/llvm/BinaryFormat/COFF.h b/llvm/include/llvm/BinaryFormat/COFF.h index f3b5d5e..64fe216 100644 --- a/llvm/include/llvm/BinaryFormat/COFF.h +++ b/llvm/include/llvm/BinaryFormat/COFF.h @@ -694,7 +694,24 @@ enum DLLCharacteristics : unsigned { enum ExtendedDLLCharacteristics : unsigned { /// Image is CET compatible - IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT = 0x0001 + IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT = 0x0001, + /// Image is CET compatible in strict mode + IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT_STRICT_MODE = 0x0002, + /// Image is CET compatible in such a way that context IP validation is + /// relaxed + IMAGE_DLL_CHARACTERISTICS_EX_CET_SET_CONTEXT_IP_VALIDATION_RELAXED_MODE = + 0x0004, + /// Image is CET compatible in such a way that the use of + /// dynamic APIs is restricted to processes only + IMAGE_DLL_CHARACTERISTICS_EX_CET_DYNAMIC_APIS_ALLOW_IN_PROC_ONLY = 0x0008, + /// Reserved for future use. Not used by MSVC link.exe + IMAGE_DLL_CHARACTERISTICS_EX_CET_RESERVED_1 = 0x0010, + /// Reserved for future use. Not used by MSVC link.exe + IMAGE_DLL_CHARACTERISTICS_EX_CET_RESERVED_2 = 0x0020, + /// Image is CFI compatible. + IMAGE_DLL_CHARACTERISTICS_EX_FORWARD_CFI_COMPAT = 0x0040, + /// Image is hotpatch compatible. + IMAGE_DLL_CHARACTERISTICS_EX_HOTPATCH_COMPATIBLE = 0x0080, }; enum DebugType : unsigned { diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index ad35d7f..749971e 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -973,7 +973,10 @@ enum : unsigned { // SM based processor values. EF_CUDA_SM100 = 0x6400, + EF_CUDA_SM101 = 0x6500, + EF_CUDA_SM103 = 0x6700, EF_CUDA_SM120 = 0x7800, + EF_CUDA_SM121 = 0x7900, // Set when using an accelerator variant like sm_100a. EF_CUDA_ACCELERATORS = 0x8, diff --git a/llvm/include/llvm/BinaryFormat/SFrame.h b/llvm/include/llvm/BinaryFormat/SFrame.h index 98dbe38..0c6c4d1 100644 --- a/llvm/include/llvm/BinaryFormat/SFrame.h +++ b/llvm/include/llvm/BinaryFormat/SFrame.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Endian.h" @@ -49,29 +50,27 @@ enum class ABI : uint8_t { /// SFrame FRE Types. Bits 0-3 of FuncDescEntry.Info. enum class FREType : uint8_t { - Addr1 = 0, - Addr2 = 1, - Addr4 = 2, +#define HANDLE_SFRAME_FRE_TYPE(CODE, NAME) NAME = CODE, +#include "llvm/BinaryFormat/SFrameConstants.def" }; /// SFrame FDE Types. Bit 4 of FuncDescEntry.Info. enum class FDEType : uint8_t { - PCInc = 0, - PCMask = 1, +#define HANDLE_SFRAME_FDE_TYPE(CODE, NAME) NAME = CODE, +#include "llvm/BinaryFormat/SFrameConstants.def" }; /// Speficies key used for signing return addresses. Bit 5 of /// FuncDescEntry.Info. enum class AArch64PAuthKey : uint8_t { - A = 0, - B = 1, +#define HANDLE_SFRAME_AARCH64_PAUTH_KEY(CODE, NAME) NAME = CODE, +#include "llvm/BinaryFormat/SFrameConstants.def" }; -/// Size of stack offsets. Bits 5-6 of FREInfo.Info. +/// Size of stack offsets. Bits 6-7 of FREInfo.Info. enum class FREOffset : uint8_t { - B1 = 0, - B2 = 1, - B4 = 2, +#define HANDLE_SFRAME_FRE_OFFSET(CODE, NAME) NAME = CODE, +#include "llvm/BinaryFormat/SFrameConstants.def" }; /// Stack frame base register. Bit 0 of FREInfo.Info. @@ -163,9 +162,13 @@ template <endianness E> using FrameRowEntryAddr1 = FrameRowEntry<uint8_t, E>; template <endianness E> using FrameRowEntryAddr2 = FrameRowEntry<uint16_t, E>; template <endianness E> using FrameRowEntryAddr4 = FrameRowEntry<uint32_t, E>; -ArrayRef<EnumEntry<Version>> getVersions(); -ArrayRef<EnumEntry<Flags>> getFlags(); -ArrayRef<EnumEntry<ABI>> getABIs(); +LLVM_ABI ArrayRef<EnumEntry<Version>> getVersions(); +LLVM_ABI ArrayRef<EnumEntry<Flags>> getFlags(); +LLVM_ABI ArrayRef<EnumEntry<ABI>> getABIs(); +LLVM_ABI ArrayRef<EnumEntry<FREType>> getFRETypes(); +LLVM_ABI ArrayRef<EnumEntry<FDEType>> getFDETypes(); +LLVM_ABI ArrayRef<EnumEntry<AArch64PAuthKey>> getAArch64PAuthKeys(); +LLVM_ABI ArrayRef<EnumEntry<FREOffset>> getFREOffsets(); } // namespace sframe } // namespace llvm diff --git a/llvm/include/llvm/BinaryFormat/SFrameConstants.def b/llvm/include/llvm/BinaryFormat/SFrameConstants.def index 643b15f..fddd440 100644 --- a/llvm/include/llvm/BinaryFormat/SFrameConstants.def +++ b/llvm/include/llvm/BinaryFormat/SFrameConstants.def @@ -6,8 +6,11 @@ // //===----------------------------------------------------------------------===// -#if !(defined(HANDLE_SFRAME_VERSION) || defined(HANDLE_SFRAME_FLAG) || \ - defined(HANDLE_SFRAME_ABI)) +#if !(defined(HANDLE_SFRAME_VERSION) || defined(HANDLE_SFRAME_FLAG) || \ + defined(HANDLE_SFRAME_ABI) || defined(HANDLE_SFRAME_FRE_TYPE) || \ + defined(HANDLE_SFRAME_FDE_TYPE) || \ + defined(HANDLE_SFRAME_AARCH64_PAUTH_KEY) || \ + defined(HANDLE_SFRAME_FRE_OFFSET)) #error "Missing HANDLE_SFRAME definition" #endif @@ -23,6 +26,22 @@ #define HANDLE_SFRAME_ABI(CODE, NAME) #endif +#ifndef HANDLE_SFRAME_FRE_TYPE +#define HANDLE_SFRAME_FRE_TYPE(CODE, NAME) +#endif + +#ifndef HANDLE_SFRAME_FDE_TYPE +#define HANDLE_SFRAME_FDE_TYPE(CODE, NAME) +#endif + +#ifndef HANDLE_SFRAME_AARCH64_PAUTH_KEY +#define HANDLE_SFRAME_AARCH64_PAUTH_KEY(CODE, NAME) +#endif + +#ifndef HANDLE_SFRAME_FRE_OFFSET +#define HANDLE_SFRAME_FRE_OFFSET(CODE, NAME) +#endif + HANDLE_SFRAME_VERSION(0x01, V1) HANDLE_SFRAME_VERSION(0x02, V2) @@ -34,6 +53,24 @@ HANDLE_SFRAME_ABI(0x01, AArch64EndianBig) HANDLE_SFRAME_ABI(0x02, AArch64EndianLittle) HANDLE_SFRAME_ABI(0x03, AMD64EndianLittle) +HANDLE_SFRAME_FRE_TYPE(0x00, Addr1) +HANDLE_SFRAME_FRE_TYPE(0x01, Addr2) +HANDLE_SFRAME_FRE_TYPE(0x02, Addr4) + +HANDLE_SFRAME_FDE_TYPE(0, PCInc) +HANDLE_SFRAME_FDE_TYPE(1, PCMask) + +HANDLE_SFRAME_AARCH64_PAUTH_KEY(0, A) +HANDLE_SFRAME_AARCH64_PAUTH_KEY(1, B) + +HANDLE_SFRAME_FRE_OFFSET(0, B1) +HANDLE_SFRAME_FRE_OFFSET(1, B2) +HANDLE_SFRAME_FRE_OFFSET(2, B4) + #undef HANDLE_SFRAME_VERSION #undef HANDLE_SFRAME_FLAG #undef HANDLE_SFRAME_ABI +#undef HANDLE_SFRAME_FRE_TYPE +#undef HANDLE_SFRAME_FDE_TYPE +#undef HANDLE_SFRAME_AARCH64_PAUTH_KEY +#undef HANDLE_SFRAME_FRE_OFFSET diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h index 78f5eb4..5f53681 100644 --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -466,7 +466,7 @@ private: EmitCode(Abbrev); - unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos()); + unsigned i = 0, e = Abbv->getNumOperandInfos(); if (Code) { assert(e && "Expected non-empty abbreviation"); const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i++); @@ -632,8 +632,7 @@ private: void EncodeAbbrev(const BitCodeAbbrev &Abbv) { EmitCode(bitc::DEFINE_ABBREV); EmitVBR(Abbv.getNumOperandInfos(), 5); - for (unsigned i = 0, e = static_cast<unsigned>(Abbv.getNumOperandInfos()); - i != e; ++i) { + for (unsigned i = 0, e = Abbv.getNumOperandInfos(); i != e; ++i) { const BitCodeAbbrevOp &Op = Abbv.getOperandInfo(i); Emit(Op.isLiteral(), 1); if (Op.isLiteral()) { diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index faab2503..91c0142 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -190,6 +190,36 @@ private: /// Emit comments in assembly output if this is true. bool VerboseAsm; + /// Store symbols and type identifiers used to create callgraph section + /// entries related to a function. + struct FunctionInfo { + /// Numeric type identifier used in callgraph section for indirect calls + /// and targets. + using CGTypeId = uint64_t; + + /// Enumeration of function kinds, and their mapping to function kind values + /// stored in callgraph section entries. + /// Must match the enum in llvm/tools/llvm-objdump/llvm-objdump.cpp. + enum class FunctionKind : uint64_t { + /// Function cannot be target to indirect calls. + NOT_INDIRECT_TARGET = 0, + + /// Function may be target to indirect calls but its type id is unknown. + INDIRECT_TARGET_UNKNOWN_TID = 1, + + /// Function may be target to indirect calls and its type id is known. + INDIRECT_TARGET_KNOWN_TID = 2, + }; + + /// Map type identifiers to callsite labels. Labels are generated for each + /// indirect callsite in the function. + SmallVector<std::pair<CGTypeId, MCSymbol *>> CallSiteLabels; + }; + + enum CallGraphSectionFormatVersion : uint64_t { + V_0 = 0, + }; + /// Output stream for the stack usage file (i.e., .su file). std::unique_ptr<raw_fd_ostream> StackUsageStream; @@ -355,6 +385,13 @@ public: /// are available. Returns empty string otherwise. StringRef getConstantSectionSuffix(const Constant *C) const; + /// Generate and emit labels for callees of the indirect callsites which will + /// be used to populate the .callgraph section. + void emitIndirectCalleeLabels( + FunctionInfo &FuncInfo, + const MachineFunction::CallSiteInfoMap &CallSitesInfoMap, + const MachineInstr &MI); + //===------------------------------------------------------------------===// // XRay instrumentation implementation. //===------------------------------------------------------------------===// @@ -442,6 +479,8 @@ public: void emitKCFITrapEntry(const MachineFunction &MF, const MCSymbol *Symbol); virtual void emitKCFITypeId(const MachineFunction &MF); + void emitCallGraphSection(const MachineFunction &MF, FunctionInfo &FuncInfo); + void emitPseudoProbe(const MachineInstr &MI); void emitRemarksSection(remarks::RemarkStreamer &RS); diff --git a/llvm/include/llvm/CodeGen/GCMetadata.h b/llvm/include/llvm/CodeGen/GCMetadata.h index 33f5301..5b9ee28 100644 --- a/llvm/include/llvm/CodeGen/GCMetadata.h +++ b/llvm/include/llvm/CodeGen/GCMetadata.h @@ -101,12 +101,12 @@ private: // are live per safe point (1.5% on 64-bit hosts). public: - GCFunctionInfo(const Function &F, GCStrategy &S); - ~GCFunctionInfo(); + LLVM_ABI GCFunctionInfo(const Function &F, GCStrategy &S); + LLVM_ABI ~GCFunctionInfo(); /// Handle invalidation explicitly. - bool invalidate(Function &F, const PreservedAnalyses &PA, - FunctionAnalysisManager::Invalidator &Inv); + LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); /// getFunction - Return the function to which this metadata applies. const Function &getFunction() const { return F; } @@ -163,8 +163,8 @@ public: GCStrategyMap(GCStrategyMap &&) = default; /// Handle invalidation explicitly. - bool invalidate(Module &M, const PreservedAnalyses &PA, - ModuleAnalysisManager::Invalidator &Inv); + LLVM_ABI bool invalidate(Module &M, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &Inv); using iterator = MapT::iterator; using const_iterator = MapT::const_iterator; @@ -205,7 +205,7 @@ class CollectorMetadataAnalysis public: using Result = GCStrategyMap; - Result run(Module &M, ModuleAnalysisManager &MAM); + LLVM_ABI Result run(Module &M, ModuleAnalysisManager &MAM); }; /// An analysis pass which caches information about the Function. @@ -217,7 +217,7 @@ class GCFunctionAnalysis : public AnalysisInfoMixin<GCFunctionAnalysis> { public: using Result = GCFunctionInfo; - Result run(Function &F, FunctionAnalysisManager &FAM); + LLVM_ABI Result run(Function &F, FunctionAnalysisManager &FAM); }; /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or @@ -228,7 +228,7 @@ public: /// This pass requires `CollectorMetadataAnalysis`. class GCLoweringPass : public PassInfoMixin<GCLoweringPass> { public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); }; /// An analysis pass which caches information about the entire Module. @@ -244,7 +244,7 @@ public: /// Lookup the GCStrategy object associated with the given gc name. /// Objects are owned internally; No caller should attempt to delete the /// returned objects. - GCStrategy *getGCStrategy(const StringRef Name); + LLVM_ABI GCStrategy *getGCStrategy(const StringRef Name); /// List of per function info objects. In theory, Each of these /// may be associated with a different GC. @@ -265,14 +265,14 @@ private: public: using iterator = SmallVector<std::unique_ptr<GCStrategy>, 1>::const_iterator; - static char ID; + LLVM_ABI static char ID; - GCModuleInfo(); + LLVM_ABI GCModuleInfo(); /// clear - Resets the pass. Any pass, which uses GCModuleInfo, should /// call it in doFinalization(). /// - void clear(); + LLVM_ABI void clear(); /// begin/end - Iterators for used strategies. /// @@ -282,7 +282,7 @@ public: /// get - Look up function metadata. This is currently assumed /// have the side effect of initializing the associated GCStrategy. That /// will soon change. - GCFunctionInfo &getFunctionInfo(const Function &F); + LLVM_ABI GCFunctionInfo &getFunctionInfo(const Function &F); }; } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index 571ec6d..4292c0b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -26,9 +26,9 @@ namespace llvm { /// A base class for all GenericMachineInstrs. class GenericMachineInstr : public MachineInstr { - constexpr static unsigned PoisonFlags = NoUWrap | NoSWrap | NoUSWrap | - IsExact | Disjoint | NonNeg | - FmNoNans | FmNoInfs | SameSign; + constexpr static unsigned PoisonFlags = + NoUWrap | NoSWrap | NoUSWrap | IsExact | Disjoint | NonNeg | FmNoNans | + FmNoInfs | SameSign | InBounds; public: GenericMachineInstr() = delete; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 756c0b2..99d3cd0 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -518,6 +518,21 @@ public: const SrcOp &Op1, std::optional<unsigned> Flags = std::nullopt); + /// Build and insert an instruction with appropriate flags for addressing some + /// offset of an object, i.e.: \p Res = nuw inbounds G_PTR_ADD \p Op0, \p Op1 + /// The value of \p Op0 must be a pointer into or just after an object, adding + /// the value of \p Op1 to it must yield to a pointer into or just after the + /// same object. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res and \p Op0 must be generic virtual registers with pointer + /// type. + /// \pre \p Op1 must be a generic virtual register with scalar type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildObjectPtrOffset(const DstOp &Res, const SrcOp &Op0, + const SrcOp &Op1); + /// Materialize and insert \p Res = G_PTR_ADD \p Op0, (G_CONSTANT \p Value) /// /// G_PTR_ADD adds \p Value bytes to the pointer specified by \p Op0, @@ -534,10 +549,29 @@ public: /// type as \p Op0 or \p Op0 itself. /// /// \return a MachineInstrBuilder for the newly created instruction. - std::optional<MachineInstrBuilder> materializePtrAdd(Register &Res, - Register Op0, - const LLT ValueTy, - uint64_t Value); + std::optional<MachineInstrBuilder> + materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, + uint64_t Value, + std::optional<unsigned> Flags = std::nullopt); + + /// Materialize and insert an instruction with appropriate flags for + /// addressing some offset of an object, i.e.: + /// \p Res = nuw inbounds G_PTR_ADD \p Op0, (G_CONSTANT \p Value) + /// The value of \p Op0 must be a pointer into or just after an object, adding + /// \p Value to it must yield to a pointer into or just after the same object. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Op0 must be a generic virtual register with pointer type. + /// \pre \p ValueTy must be a scalar type. + /// \pre \p Res must be 0. This is to detect confusion between + /// materializeObjectPtrOffset() and buildObjectPtrOffset(). + /// \post \p Res will either be a new generic virtual register of the same + /// type as \p Op0 or \p Op0 itself. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + std::optional<MachineInstrBuilder> + materializeObjectPtrOffset(Register &Res, Register Op0, const LLT ValueTy, + uint64_t Value); /// Build and insert \p Res = G_PTRMASK \p Op0, \p Op1 MachineInstrBuilder buildPtrMask(const DstOp &Res, const SrcOp &Op0, diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 938d71d..9e3d919 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -323,10 +323,11 @@ public: const MachineFunction *getParent() const { return xParent; } MachineFunction *getParent() { return xParent; } - /// Returns true if the original IR terminator is an `indirectbr`. This - /// typically corresponds to a `goto` in C, rather than jump tables. - bool terminatorIsComputedGoto() const { - return back().isIndirectBranch() && + /// Returns true if the original IR terminator is an `indirectbr` with + /// successor blocks. This typically corresponds to a `goto` in C, rather than + /// jump tables. + bool terminatorIsComputedGotoWithSuccessors() const { + return back().isIndirectBranch() && !succ_empty() && llvm::all_of(successors(), [](const MachineBasicBlock *Succ) { return Succ->isIRBlockAddressTaken(); }); diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 7f88323..06c4daf 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -517,6 +517,13 @@ public: SmallVector<ArgRegPair, 1> ArgRegPairs; /// Callee type ids. SmallVector<ConstantInt *, 4> CalleeTypeIds; + + CallSiteInfo() = default; + + /// Extracts the numeric type id from the CallBase's callee_type Metadata, + /// and sets CalleeTypeIds. This is used as type id for the indirect call in + /// the call graph section. + CallSiteInfo(const CallBase &CB); }; struct CalledGlobalInfo { @@ -524,11 +531,12 @@ public: unsigned TargetFlags; }; + using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>; + private: Delegate *TheDelegate = nullptr; GISelChangeObserver *Observer = nullptr; - using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>; /// Map a call instruction to call site arguments forwarding info. CallSiteInfoMap CallSitesInfo; diff --git a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h index 1d954cf..1982ac6 100644 --- a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h @@ -48,7 +48,7 @@ public: class FreeMachineFunctionPass : public PassInfoMixin<FreeMachineFunctionPass> { public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); }; } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 94d04b8..10a9b1f 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -122,7 +122,9 @@ public: Disjoint = 1 << 19, // Each bit is zero in at least one of the inputs. NoUSWrap = 1 << 20, // Instruction supports geps // no unsigned signed wrap. - SameSign = 1 << 21 // Both operands have the same sign. + SameSign = 1 << 21, // Both operands have the same sign. + InBounds = 1 << 22 // Pointer arithmetic remains inbounds. + // Implies NoUSWrap. }; private: diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h index e63e77a..e705d7d9 100644 --- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -69,6 +69,32 @@ enum { } // end namespace RegState +/// Set of metadata that should be preserved when using BuildMI(). This provides +/// a more convenient way of preserving DebugLoc, PCSections and MMRA. +class MIMetadata { +public: + MIMetadata() = default; + MIMetadata(DebugLoc DL, MDNode *PCSections = nullptr, MDNode *MMRA = nullptr) + : DL(std::move(DL)), PCSections(PCSections), MMRA(MMRA) {} + MIMetadata(const DILocation *DI, MDNode *PCSections = nullptr, + MDNode *MMRA = nullptr) + : DL(DI), PCSections(PCSections), MMRA(MMRA) {} + explicit MIMetadata(const Instruction &From) + : DL(From.getDebugLoc()), + PCSections(From.getMetadata(LLVMContext::MD_pcsections)) {} + explicit MIMetadata(const MachineInstr &From) + : DL(From.getDebugLoc()), PCSections(From.getPCSections()) {} + + const DebugLoc &getDL() const { return DL; } + MDNode *getPCSections() const { return PCSections; } + MDNode *getMMRAMetadata() const { return MMRA; } + +private: + DebugLoc DL; + MDNode *PCSections = nullptr; + MDNode *MMRA = nullptr; +}; + class MachineInstrBuilder { MachineFunction *MF = nullptr; MachineInstr *MI = nullptr; @@ -317,15 +343,11 @@ public: } } - const MachineInstrBuilder &setPCSections(MDNode *MD) const { - if (MD) - MI->setPCSections(*MF, MD); - return *this; - } - - const MachineInstrBuilder &setMMRAMetadata(MDNode *MMRA) const { - if (MMRA) - MI->setMMRAMetadata(*MF, MMRA); + const MachineInstrBuilder ©MIMetadata(const MIMetadata &MIMD) const { + if (MIMD.getPCSections()) + MI->setPCSections(*MF, MIMD.getPCSections()); + if (MIMD.getMMRAMetadata()) + MI->setMMRAMetadata(*MF, MIMD.getMMRAMetadata()); return *this; } @@ -343,38 +365,11 @@ public: } }; -/// Set of metadata that should be preserved when using BuildMI(). This provides -/// a more convenient way of preserving DebugLoc, PCSections and MMRA. -class MIMetadata { -public: - MIMetadata() = default; - MIMetadata(DebugLoc DL, MDNode *PCSections = nullptr, MDNode *MMRA = nullptr) - : DL(std::move(DL)), PCSections(PCSections), MMRA(MMRA) {} - MIMetadata(const DILocation *DI, MDNode *PCSections = nullptr, - MDNode *MMRA = nullptr) - : DL(DI), PCSections(PCSections), MMRA(MMRA) {} - explicit MIMetadata(const Instruction &From) - : DL(From.getDebugLoc()), - PCSections(From.getMetadata(LLVMContext::MD_pcsections)) {} - explicit MIMetadata(const MachineInstr &From) - : DL(From.getDebugLoc()), PCSections(From.getPCSections()) {} - - const DebugLoc &getDL() const { return DL; } - MDNode *getPCSections() const { return PCSections; } - MDNode *getMMRAMetadata() const { return MMRA; } - -private: - DebugLoc DL; - MDNode *PCSections = nullptr; - MDNode *MMRA = nullptr; -}; - /// Builder interface. Specify how to create the initial instruction itself. inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID) { return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, MIMD.getDL())) - .setPCSections(MIMD.getPCSections()) - .setMMRAMetadata(MIMD.getMMRAMetadata()); + .copyMIMetadata(MIMD); } /// This version of the builder sets up the first operand as a @@ -382,8 +377,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID, Register DestReg) { return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, MIMD.getDL())) - .setPCSections(MIMD.getPCSections()) - .setMMRAMetadata(MIMD.getMMRAMetadata()) + .copyMIMetadata(MIMD) .addReg(DestReg, RegState::Define); } @@ -397,10 +391,8 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); BB.insert(I, MI); - return MachineInstrBuilder(MF, MI) - .setPCSections(MIMD.getPCSections()) - .setMMRAMetadata(MIMD.getMMRAMetadata()) - .addReg(DestReg, RegState::Define); + return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD).addReg( + DestReg, RegState::Define); } /// This version of the builder inserts the newly-built instruction before @@ -416,10 +408,8 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); BB.insert(I, MI); - return MachineInstrBuilder(MF, MI) - .setPCSections(MIMD.getPCSections()) - .setMMRAMetadata(MIMD.getMMRAMetadata()) - .addReg(DestReg, RegState::Define); + return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD).addReg( + DestReg, RegState::Define); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I, @@ -449,9 +439,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); BB.insert(I, MI); - return MachineInstrBuilder(MF, MI) - .setPCSections(MIMD.getPCSections()) - .setMMRAMetadata(MIMD.getMMRAMetadata()); + return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, @@ -461,9 +449,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); BB.insert(I, MI); - return MachineInstrBuilder(MF, MI) - .setPCSections(MIMD.getPCSections()) - .setMMRAMetadata(MIMD.getMMRAMetadata()); + return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I, diff --git a/llvm/include/llvm/CodeGen/MachineInstrBundle.h b/llvm/include/llvm/CodeGen/MachineInstrBundle.h index ebf7534..65eb5c4 100644 --- a/llvm/include/llvm/CodeGen/MachineInstrBundle.h +++ b/llvm/include/llvm/CodeGen/MachineInstrBundle.h @@ -297,8 +297,8 @@ LLVM_ABI PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI, class FinalizeBundleTestPass : public PassInfoMixin<FinalizeBundleTestPass> { public: - PreservedAnalyses run(MachineFunction &MF, - MachineFunctionAnalysisManager &MFAM); + LLVM_ABI PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); }; } // End llvm namespace diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index efda7eb..5a2aee2 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -1303,8 +1303,8 @@ protected: SchedBoundary Top; SchedBoundary Bot; - ClusterInfo *TopCluster; - ClusterInfo *BotCluster; + unsigned TopClusterID; + unsigned BotClusterID; /// Candidate last picked from Top boundary. SchedCandidate TopCand; @@ -1346,8 +1346,8 @@ protected: /// Candidate last picked from Bot boundary. SchedCandidate BotCand; - ClusterInfo *TopCluster; - ClusterInfo *BotCluster; + unsigned TopClusterID; + unsigned BotClusterID; public: PostGenericScheduler(const MachineSchedContext *C) diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index 2967532..be90250 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -578,6 +578,18 @@ m_InsertSubvector(const LHS &Base, const RHS &Sub, const IDX &Idx) { return TernaryOpc_match<LHS, RHS, IDX>(ISD::INSERT_SUBVECTOR, Base, Sub, Idx); } +template <typename LTy, typename RTy, typename TTy, typename FTy, typename CCTy> +inline auto m_SelectCC(const LTy &L, const RTy &R, const TTy &T, const FTy &F, + const CCTy &CC) { + return m_Node(ISD::SELECT_CC, L, R, T, F, CC); +} + +template <typename LTy, typename RTy, typename TTy, typename FTy, typename CCTy> +inline auto m_SelectCCLike(const LTy &L, const RTy &R, const TTy &T, + const FTy &F, const CCTy &CC) { + return m_AnyOf(m_Select(m_SetCC(L, R, CC), T, F), m_SelectCC(L, R, T, F, CC)); +} + // === Binary operations === template <typename LHS_P, typename RHS_P, bool Commutable = false, bool ExcludeChain = false> diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h index 3a0a31b..122b7be 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -240,6 +240,11 @@ class TargetRegisterInfo; typedef SmallSet<SUnit *, 8> ClusterInfo; constexpr unsigned InvalidClusterId = ~0u; + /// Return whether the input cluster ID's are the same and valid. + inline bool isTheSameCluster(unsigned A, unsigned B) { + return A != InvalidClusterId && A == B; + } + /// Scheduling unit. This is a node in the scheduling DAG. class SUnit { private: diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 8f88811..11ae8cd 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -182,7 +182,7 @@ public: return SDValue(Node, R); } - /// Return true if this node is an operand of N. + /// Return true if the referenced return value is an operand of N. LLVM_ABI bool isOperandOf(const SDNode *N) const; /// Return the ValueType of the referenced return value. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h index 2834331..b865e02 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h @@ -17,6 +17,7 @@ #include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/LazyReexports.h" +#include "llvm/Support/Compiler.h" #include <mutex> @@ -33,7 +34,7 @@ class EPCIndirectionUtils { public: /// ABI support base class. Used to write resolver, stub, and trampoline /// blocks. - class ABISupport { + class LLVM_ABI ABISupport { protected: ABISupport(unsigned PointerSize, unsigned TrampolineSize, unsigned StubSize, unsigned StubToPointerMaxDisplacement, unsigned ResolverCodeSize) @@ -81,7 +82,7 @@ public: CreateWithABI(ExecutorProcessControl &EPC); /// Create based on the ExecutorProcessControl triple. - static Expected<std::unique_ptr<EPCIndirectionUtils>> + LLVM_ABI static Expected<std::unique_ptr<EPCIndirectionUtils>> Create(ExecutorProcessControl &EPC); /// Create based on the ExecutorProcessControl triple. @@ -98,27 +99,27 @@ public: /// Release memory for resources held by this instance. This *must* be called /// prior to destruction of the class. - Error cleanup(); + LLVM_ABI Error cleanup(); /// Write resolver code to the executor process and return its address. /// This must be called before any call to createTrampolinePool or /// createLazyCallThroughManager. - Expected<ExecutorAddr> writeResolverBlock(ExecutorAddr ReentryFnAddr, - ExecutorAddr ReentryCtxAddr); + LLVM_ABI Expected<ExecutorAddr> + writeResolverBlock(ExecutorAddr ReentryFnAddr, ExecutorAddr ReentryCtxAddr); /// Returns the address of the Resolver block. Returns zero if the /// writeResolverBlock method has not previously been called. ExecutorAddr getResolverBlockAddress() const { return ResolverBlockAddr; } /// Create an IndirectStubsManager for the executor process. - std::unique_ptr<IndirectStubsManager> createIndirectStubsManager(); + LLVM_ABI std::unique_ptr<IndirectStubsManager> createIndirectStubsManager(); /// Create a TrampolinePool for the executor process. - TrampolinePool &getTrampolinePool(); + LLVM_ABI TrampolinePool &getTrampolinePool(); /// Create a LazyCallThroughManager. /// This function should only be called once. - LazyCallThroughManager & + LLVM_ABI LazyCallThroughManager & createLazyCallThroughManager(ExecutionSession &ES, ExecutorAddr ErrorHandlerAddr); @@ -170,7 +171,7 @@ private: /// called. /// /// This function is experimental and likely subject to revision. -Error setUpInProcessLCTMReentryViaEPCIU(EPCIndirectionUtils &EPCIU); +LLVM_ABI Error setUpInProcessLCTMReentryViaEPCIU(EPCIndirectionUtils &EPCIU); namespace detail { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h b/llvm/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h index c92719e..a9f5c45 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h @@ -16,6 +16,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/Speculation.h" +#include "llvm/Support/Compiler.h" namespace llvm { @@ -24,8 +25,8 @@ namespace orc { // Provides common code. class SpeculateQuery { protected: - void findCalles(const BasicBlock *, DenseSet<StringRef> &); - bool isStraightLine(const Function &F); + LLVM_ABI void findCalles(const BasicBlock *, DenseSet<StringRef> &); + LLVM_ABI bool isStraightLine(const Function &F); public: using ResultTy = std::optional<DenseMap<StringRef, DenseSet<StringRef>>>; @@ -37,7 +38,7 @@ class BlockFreqQuery : public SpeculateQuery { public: // Find likely next executables based on IR Block Frequency - ResultTy operator()(Function &F); + LLVM_ABI ResultTy operator()(Function &F); }; // This Query generates a sequence of basic blocks which follows the order of @@ -73,7 +74,7 @@ private: VisitedBlocksInfoTy &); public: - ResultTy operator()(Function &F); + LLVM_ABI ResultTy operator()(Function &F); }; } // namespace orc diff --git a/llvm/include/llvm/Frontend/HLSL/HLSLBinding.h b/llvm/include/llvm/Frontend/HLSL/HLSLBinding.h new file mode 100644 index 0000000..70a2eeb --- /dev/null +++ b/llvm/include/llvm/Frontend/HLSL/HLSLBinding.h @@ -0,0 +1,162 @@ +//===- HLSLBinding.h - Representation for resource bindings in HLSL -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file contains objects to represent resource bindings. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FRONTEND_HLSL_HLSLBINDING_H +#define LLVM_FRONTEND_HLSL_HLSLBINDING_H + +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DXILABI.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { +namespace hlsl { + +/// BindingInfo represents the ranges of bindings and free space for each +/// `dxil::ResourceClass`. This can represent HLSL-level bindings as well as +/// bindings described in root signatures, and can be used for analysis of +/// overlapping or missing bindings as well as for finding space for implicit +/// bindings. +/// +/// As an example, given these resource bindings: +/// +/// RWBuffer<float> A[10] : register(u3); +/// RWBuffer<float> B[] : register(u5, space2) +/// +/// The binding info for UAV bindings should look like this: +/// +/// UAVSpaces { +/// ResClass = ResourceClass::UAV, +/// Spaces = { +/// { Space = 0u, FreeRanges = {{ 0u, 2u }, { 13u, ~0u }} }, +/// { Space = 2u, FreeRanges = {{ 0u, 4u }} } +/// } +/// } +class BindingInfo { +public: + struct BindingRange { + uint32_t LowerBound; + uint32_t UpperBound; + BindingRange(uint32_t LB, uint32_t UB) : LowerBound(LB), UpperBound(UB) {} + }; + + struct RegisterSpace { + uint32_t Space; + SmallVector<BindingRange> FreeRanges; + RegisterSpace(uint32_t Space) : Space(Space) { + FreeRanges.emplace_back(0, ~0u); + } + // Size == -1 means unbounded array + LLVM_ABI std::optional<uint32_t> findAvailableBinding(int32_t Size); + }; + + struct BindingSpaces { + dxil::ResourceClass RC; + llvm::SmallVector<RegisterSpace> Spaces; + BindingSpaces(dxil::ResourceClass RC) : RC(RC) {} + LLVM_ABI RegisterSpace &getOrInsertSpace(uint32_t Space); + }; + +private: + BindingSpaces SRVSpaces{dxil::ResourceClass::SRV}; + BindingSpaces UAVSpaces{dxil::ResourceClass::UAV}; + BindingSpaces CBufferSpaces{dxil::ResourceClass::CBuffer}; + BindingSpaces SamplerSpaces{dxil::ResourceClass::Sampler}; + +public: + BindingSpaces &getBindingSpaces(dxil::ResourceClass RC) { + switch (RC) { + case dxil::ResourceClass::SRV: + return SRVSpaces; + case dxil::ResourceClass::UAV: + return UAVSpaces; + case dxil::ResourceClass::CBuffer: + return CBufferSpaces; + case dxil::ResourceClass::Sampler: + return SamplerSpaces; + } + + llvm_unreachable("Invalid resource class"); + } + const BindingSpaces &getBindingSpaces(dxil::ResourceClass RC) const { + return const_cast<BindingInfo *>(this)->getBindingSpaces(RC); + } + + // Size == -1 means unbounded array + LLVM_ABI std::optional<uint32_t> + findAvailableBinding(dxil::ResourceClass RC, uint32_t Space, int32_t Size); + + friend class BindingInfoBuilder; +}; + +/// Builder class for creating a /c BindingInfo. +class BindingInfoBuilder { +public: + struct Binding { + dxil::ResourceClass RC; + uint32_t Space; + uint32_t LowerBound; + uint32_t UpperBound; + const void *Cookie; + + Binding(dxil::ResourceClass RC, uint32_t Space, uint32_t LowerBound, + uint32_t UpperBound, const void *Cookie) + : RC(RC), Space(Space), LowerBound(LowerBound), UpperBound(UpperBound), + Cookie(Cookie) {} + + bool isUnbounded() const { return UpperBound == ~0U; } + + bool operator==(const Binding &RHS) const { + return std::tie(RC, Space, LowerBound, UpperBound, Cookie) == + std::tie(RHS.RC, RHS.Space, RHS.LowerBound, RHS.UpperBound, + RHS.Cookie); + } + bool operator!=(const Binding &RHS) const { return !(*this == RHS); } + + bool operator<(const Binding &RHS) const { + return std::tie(RC, Space, LowerBound) < + std::tie(RHS.RC, RHS.Space, RHS.LowerBound); + } + }; + +private: + SmallVector<Binding> Bindings; + +public: + void trackBinding(dxil::ResourceClass RC, uint32_t Space, uint32_t LowerBound, + uint32_t UpperBound, const void *Cookie) { + Bindings.emplace_back(RC, Space, LowerBound, UpperBound, Cookie); + } + /// Calculate the binding info - \c ReportOverlap will be called once for each + /// overlapping binding. + BindingInfo calculateBindingInfo( + llvm::function_ref<void(const BindingInfoBuilder &Builder, + const Binding &Overlapping)> + ReportOverlap); + + /// Calculate the binding info - \c HasOverlap will be set to indicate whether + /// there are any overlapping bindings. + BindingInfo calculateBindingInfo(bool &HasOverlap) { + HasOverlap = false; + return calculateBindingInfo( + [&HasOverlap](auto, auto) { HasOverlap = true; }); + } + + /// For use in the \c ReportOverlap callback of \c calculateBindingInfo - + /// finds a binding that the \c ReportedBinding overlaps with. + const Binding &findOverlapping(const Binding &ReportedBinding) const; +}; + +} // namespace hlsl +} // namespace llvm + +#endif // LLVM_FRONTEND_HLSL_HLSLBINDING_H diff --git a/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h b/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h index 6fa51ed..0bd0774 100644 --- a/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h +++ b/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h @@ -14,6 +14,7 @@ #ifndef LLVM_FRONTEND_HLSL_ROOTSIGNATUREMETADATA_H #define LLVM_FRONTEND_HLSL_ROOTSIGNATUREMETADATA_H +#include "llvm/ADT/StringRef.h" #include "llvm/Frontend/HLSL/HLSLRootSignature.h" #include "llvm/IR/Constants.h" #include "llvm/MC/DXContainerRootSignature.h" @@ -26,6 +27,80 @@ class Metadata; namespace hlsl { namespace rootsig { +template <typename T> +class RootSignatureValidationError + : public ErrorInfo<RootSignatureValidationError<T>> { +public: + static char ID; + StringRef ParamName; + T Value; + + RootSignatureValidationError(StringRef ParamName, T Value) + : ParamName(ParamName), Value(Value) {} + + void log(raw_ostream &OS) const override { + OS << "Invalid value for " << ParamName << ": " << Value; + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } +}; + +class GenericRSMetadataError : public ErrorInfo<GenericRSMetadataError> { +public: + static char ID; + StringRef Message; + MDNode *MD; + + GenericRSMetadataError(StringRef Message, MDNode *MD) + : Message(Message), MD(MD) {} + + void log(raw_ostream &OS) const override { + OS << Message; + if (MD) { + OS << "\n"; + MD->printTree(OS); + } + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } +}; + +class InvalidRSMetadataFormat : public ErrorInfo<InvalidRSMetadataFormat> { +public: + static char ID; + StringRef ElementName; + + InvalidRSMetadataFormat(StringRef ElementName) : ElementName(ElementName) {} + + void log(raw_ostream &OS) const override { + OS << "Invalid format for " << ElementName; + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } +}; + +class InvalidRSMetadataValue : public ErrorInfo<InvalidRSMetadataValue> { +public: + static char ID; + StringRef ParamName; + + InvalidRSMetadataValue(StringRef ParamName) : ParamName(ParamName) {} + + void log(raw_ostream &OS) const override { + OS << "Invalid value for " << ParamName; + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } +}; + class MetadataBuilder { public: MetadataBuilder(llvm::LLVMContext &Ctx, ArrayRef<RootElement> Elements) @@ -66,29 +141,27 @@ class MetadataParser { public: MetadataParser(MDNode *Root) : Root(Root) {} - LLVM_ABI bool ParseRootSignature(LLVMContext *Ctx, - mcdxbc::RootSignatureDesc &RSD); + LLVM_ABI llvm::Expected<llvm::mcdxbc::RootSignatureDesc> + ParseRootSignature(uint32_t Version); private: - bool parseRootFlags(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD, - MDNode *RootFlagNode); - bool parseRootConstants(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD, - MDNode *RootConstantNode); - bool parseRootDescriptors(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD, - MDNode *RootDescriptorNode, - RootSignatureElementKind ElementKind); - bool parseDescriptorRange(LLVMContext *Ctx, mcdxbc::DescriptorTable &Table, - MDNode *RangeDescriptorNode); - bool parseDescriptorTable(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD, - MDNode *DescriptorTableNode); - bool parseRootSignatureElement(LLVMContext *Ctx, - mcdxbc::RootSignatureDesc &RSD, - MDNode *Element); - bool parseStaticSampler(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD, - MDNode *StaticSamplerNode); - - bool validateRootSignature(LLVMContext *Ctx, - const llvm::mcdxbc::RootSignatureDesc &RSD); + llvm::Error parseRootFlags(mcdxbc::RootSignatureDesc &RSD, + MDNode *RootFlagNode); + llvm::Error parseRootConstants(mcdxbc::RootSignatureDesc &RSD, + MDNode *RootConstantNode); + llvm::Error parseRootDescriptors(mcdxbc::RootSignatureDesc &RSD, + MDNode *RootDescriptorNode, + RootSignatureElementKind ElementKind); + llvm::Error parseDescriptorRange(mcdxbc::DescriptorTable &Table, + MDNode *RangeDescriptorNode); + llvm::Error parseDescriptorTable(mcdxbc::RootSignatureDesc &RSD, + MDNode *DescriptorTableNode); + llvm::Error parseRootSignatureElement(mcdxbc::RootSignatureDesc &RSD, + MDNode *Element); + llvm::Error parseStaticSampler(mcdxbc::RootSignatureDesc &RSD, + MDNode *StaticSamplerNode); + + llvm::Error validateRootSignature(const llvm::mcdxbc::RootSignatureDesc &RSD); MDNode *Root; }; diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index 7919f7a..ce1cedc 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -578,8 +578,9 @@ struct DynamicAllocatorsT { template <typename T, typename I, typename E> // struct EnterT { using List = ObjectListT<I, E>; - using WrapperTrait = std::true_type; - List v; + ENUM(Modifier, Automap); + using TupleTrait = std::true_type; + std::tuple<OPT(Modifier), List> t; }; // V5.2: [5.6.2] `exclusive` clause diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 1b94657..79f25bb 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -179,7 +179,7 @@ def OMPC_DynamicAllocators : Clause<[Spelling<"dynamic_allocators">]> { let clangClass = "OMPDynamicAllocatorsClause"; } def OMPC_Enter : Clause<[Spelling<"enter">]> { - let flangClass = "OmpObjectList"; + let flangClass = "OmpEnterClause"; } def OMPC_Exclusive : Clause<[Spelling<"exclusive">]> { let clangClass = "OMPExclusiveClause"; @@ -1460,7 +1460,7 @@ def OMP_DoSimd : Directive<[Spelling<"do simd">]> { ]; let allowedOnceClauses = [ VersionedClause<OMPC_Collapse>, - VersionedClause<OMPC_If>, + VersionedClause<OMPC_If, 50>, VersionedClause<OMPC_NoWait>, VersionedClause<OMPC_Order, 50>, VersionedClause<OMPC_Ordered>, diff --git a/llvm/include/llvm/IR/GCStrategy.h b/llvm/include/llvm/IR/GCStrategy.h index 6b81355..44e46e4 100644 --- a/llvm/include/llvm/IR/GCStrategy.h +++ b/llvm/include/llvm/IR/GCStrategy.h @@ -47,6 +47,7 @@ #ifndef LLVM_IR_GCSTRATEGY_H #define LLVM_IR_GCSTRATEGY_H +#include "llvm/Support/Compiler.h" #include "llvm/Support/Registry.h" #include <optional> #include <string> @@ -81,7 +82,7 @@ protected: bool UsesMetadata = false; ///< If set, backend must emit metadata tables. public: - GCStrategy(); + LLVM_ABI GCStrategy(); virtual ~GCStrategy() = default; /// Return the name of the GC strategy. This is the value of the collector @@ -145,7 +146,7 @@ using GCRegistry = Registry<GCStrategy>; extern template class LLVM_TEMPLATE_ABI Registry<GCStrategy>; /// Lookup the GCStrategy object associated with the given gc name. -std::unique_ptr<GCStrategy> getGCStrategy(const StringRef Name); +LLVM_ABI std::unique_ptr<GCStrategy> getGCStrategy(const StringRef Name); } // end namespace llvm diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 0318427..2e13896 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -996,14 +996,6 @@ public: return cast<PointerType>(getRawDest()->getType())->getAddressSpace(); } - /// FIXME: Remove this function once transition to Align is over. - /// Use getDestAlign() instead. - LLVM_DEPRECATED("Use getDestAlign() instead", "getDestAlign") - unsigned getDestAlignment() const { - if (auto MA = getParamAlign(ARG_DEST)) - return MA->value(); - return 0; - } MaybeAlign getDestAlign() const { return getParamAlign(ARG_DEST); } /// Set the specified arguments of the instruction. @@ -1057,15 +1049,6 @@ public: return cast<PointerType>(getRawSource()->getType())->getAddressSpace(); } - /// FIXME: Remove this function once transition to Align is over. - /// Use getSourceAlign() instead. - LLVM_DEPRECATED("Use getSourceAlign() instead", "getSourceAlign") - unsigned getSourceAlignment() const { - if (auto MA = BaseCL::getParamAlign(ARG_SOURCE)) - return MA->value(); - return 0; - } - MaybeAlign getSourceAlign() const { return BaseCL::getParamAlign(ARG_SOURCE); } diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 3a7db6d..7265a76 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -593,6 +593,14 @@ def int_amdgcn_tanh : DefaultAttrsIntrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; +def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic< + [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] +>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">; + +def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic< + [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] +>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">; + def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic< [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable] >, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">; @@ -601,18 +609,57 @@ def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic< [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable] >, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">; -class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic< - [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] +def int_amdgcn_cvt_pk_fp8_f16 + : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty], + [IntrNoMem, IntrSpeculatable]>, + ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">; + +def int_amdgcn_cvt_pk_bf8_f16 + : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty], + [IntrNoMem, IntrSpeculatable]>, + ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">; + +// llvm.amdgcn.cvt.sr.fp8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3] +// byte_sel selects byte to write in vdst. +def int_amdgcn_cvt_sr_fp8_f16 : DefaultAttrsIntrinsic< + [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>] +>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f16">; + +// llvm.amdgcn.cvt.sr.bf8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3] +// byte_sel selects byte to write in vdst. +def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic< + [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>] +>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">; + +// llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..7] +class AMDGPUCvtScaleIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic< + [DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; -class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMType Src1Ty, string name> : DefaultAttrsIntrinsic< - [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] +class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic< + [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleF32SRIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic< [DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >, ClangBuiltin<"__builtin_amdgcn_"#name>; +def int_amdgcn_cvt_scale_pk8_f16_fp8 : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty, llvm_v2i32_ty, "cvt_scale_pk8_f16_fp8">; +def int_amdgcn_cvt_scale_pk8_bf16_fp8 : AMDGPUCvtScaleIntrinsic<llvm_v8bf16_ty, llvm_v2i32_ty, "cvt_scale_pk8_bf16_fp8">; +def int_amdgcn_cvt_scale_pk8_f16_bf8 : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty, llvm_v2i32_ty, "cvt_scale_pk8_f16_bf8">; +def int_amdgcn_cvt_scale_pk8_bf16_bf8 : AMDGPUCvtScaleIntrinsic<llvm_v8bf16_ty, llvm_v2i32_ty, "cvt_scale_pk8_bf16_bf8">; +def int_amdgcn_cvt_scale_pk8_f16_fp4 : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty, llvm_i32_ty, "cvt_scale_pk8_f16_fp4">; +def int_amdgcn_cvt_scale_pk8_bf16_fp4 : AMDGPUCvtScaleIntrinsic<llvm_v8bf16_ty, llvm_i32_ty, "cvt_scale_pk8_bf16_fp4">; +def int_amdgcn_cvt_scale_pk8_f32_fp8 : AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty, llvm_v2i32_ty, "cvt_scale_pk8_f32_fp8">; +def int_amdgcn_cvt_scale_pk8_f32_bf8 : AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty, llvm_v2i32_ty, "cvt_scale_pk8_f32_bf8">; +def int_amdgcn_cvt_scale_pk8_f32_fp4 : AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty, llvm_i32_ty, "cvt_scale_pk8_f32_fp4">; + +class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMType Src1Ty, string name> : DefaultAttrsIntrinsic< + [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] +>, ClangBuiltin<"__builtin_amdgcn_"#name>; + def int_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_pk32_fp6_f16">; def int_amdgcn_cvt_scalef32_pk32_bf6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_pk32_bf6_f16">; def int_amdgcn_cvt_scalef32_pk32_fp6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_pk32_fp6_bf16">; @@ -3473,6 +3520,12 @@ def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">, [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>; +// llvm.amdgcn.cvt.pk.fp8.f32.e5m3 int vdst, float srcA, float srcB, int old, imm word_sel +def int_amdgcn_cvt_pk_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32_e5m3">, + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, ImmArg<ArgIndex<3>>]>; + // llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] // byte_sel selects byte to write into vdst. def int_amdgcn_cvt_sr_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f32">, @@ -3486,6 +3539,12 @@ def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">, [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>; +// llvm.amdgcn.cvt.sr.fp8.f32.e5m3 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] +def int_amdgcn_cvt_sr_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32_e5m3">, + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<3>>]>; + // llvm.amdgcn.cvt.off.fp32.i4 int srcA def int_amdgcn_cvt_off_f32_i4: ClangBuiltin<"__builtin_amdgcn_cvt_off_f32_i4">, DefaultAttrsIntrinsic<[llvm_float_ty], @@ -3643,6 +3702,50 @@ def int_amdgcn_fdiv_fast : DefaultAttrsIntrinsic< [IntrNoMem, IntrSpeculatable] >; +class AMDGPUAsyncGlobalLoadToLDS : Intrinsic < + [], + [global_ptr_ty, // Base global pointer to load from + local_ptr_ty, // LDS base pointer to store to. + llvm_i32_ty, // offset + llvm_i32_ty], // gfx12+ cachepolicy: + // bits [0-2] = th + // bits [3-4] = scope + [IntrInaccessibleMemOrArgMemOnly, ReadOnly<ArgIndex<0>>, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<0>>, + NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree], + "", [SDNPMemOperand] +>; + +class AMDGPUAsyncGlobalStoreFromLDS : Intrinsic < + [], + [global_ptr_ty, // Base global pointer to store to + local_ptr_ty, // LDS base pointer to load from + llvm_i32_ty, // offset + llvm_i32_ty], // gfx12+ cachepolicy: + // bits [0-2] = th + // bits [3-4] = scope + [IntrInaccessibleMemOrArgMemOnly, WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>, NoCapture<ArgIndex<0>>, + NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree], + "", [SDNPMemOperand] +>; + +def int_amdgcn_global_load_async_to_lds_b8 : + ClangBuiltin<"__builtin_amdgcn_global_load_async_to_lds_b8">, AMDGPUAsyncGlobalLoadToLDS; +def int_amdgcn_global_load_async_to_lds_b32 : + ClangBuiltin<"__builtin_amdgcn_global_load_async_to_lds_b32">, AMDGPUAsyncGlobalLoadToLDS; +def int_amdgcn_global_load_async_to_lds_b64 : + ClangBuiltin<"__builtin_amdgcn_global_load_async_to_lds_b64">, AMDGPUAsyncGlobalLoadToLDS; +def int_amdgcn_global_load_async_to_lds_b128 : + ClangBuiltin<"__builtin_amdgcn_global_load_async_to_lds_b128">, AMDGPUAsyncGlobalLoadToLDS; + +def int_amdgcn_global_store_async_from_lds_b8 : + ClangBuiltin<"__builtin_amdgcn_global_store_async_from_lds_b8">, AMDGPUAsyncGlobalStoreFromLDS; +def int_amdgcn_global_store_async_from_lds_b32 : + ClangBuiltin<"__builtin_amdgcn_global_store_async_from_lds_b32">, AMDGPUAsyncGlobalStoreFromLDS; +def int_amdgcn_global_store_async_from_lds_b64 : + ClangBuiltin<"__builtin_amdgcn_global_store_async_from_lds_b64">, AMDGPUAsyncGlobalStoreFromLDS; +def int_amdgcn_global_store_async_from_lds_b128 : + ClangBuiltin<"__builtin_amdgcn_global_store_async_from_lds_b128">, AMDGPUAsyncGlobalStoreFromLDS; + // WMMA intrinsics. class AMDGPUWmmaIntrinsicModsAB<LLVMType AB, LLVMType CD> : Intrinsic< diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index e63a41f..99f975f 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -1717,6 +1717,16 @@ let TargetPrefix = "riscv" in { llvm_anyint_ty], [NoCapture<ArgIndex<0>>, IntrReadMem]>; + // Input: (pointer, offset, mask, vl) + def int_riscv_sseg # nf # _load_mask + : DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty], + !listsplat(LLVMMatchType<0>, + !add(nf, -1))), + [llvm_anyptr_ty, llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty], + [NoCapture<ArgIndex<0>>, IntrReadMem]>; + // Input: (<stored values>..., pointer, mask, vl) def int_riscv_seg # nf # _store_mask : DefaultAttrsIntrinsic<[], diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h index 0fd5de3..11bfd73 100644 --- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h +++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h @@ -408,7 +408,7 @@ inline APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID) { llvm_unreachable("Checking rounding mode for invalid rcp intrinsic"); } -inline DenormalMode GetNVVMDenromMode(bool ShouldFTZ) { +inline DenormalMode GetNVVMDenormMode(bool ShouldFTZ) { if (ShouldFTZ) return DenormalMode::getPreserveSign(); return DenormalMode::getIEEE(); diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 89ad4e5..eb882c4 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -134,7 +134,7 @@ struct RuntimeLibcallsInfo { /// Check if this is valid libcall for the current module, otherwise /// RTLIB::Unsupported. - RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const; + LLVM_ABI RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const; private: static const RTLIB::LibcallImpl diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index d8e632b..323c478 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -542,21 +542,23 @@ private: ArrayRef<SymbolResolution> Res, unsigned Partition, bool InSummary); - // These functions take a range of symbol resolutions [ResI, ResE) and consume - // the resolutions used by a single input module by incrementing ResI. After - // these functions return, [ResI, ResE) will refer to the resolution range for - // the remaining modules in the InputFile. - Error addModule(InputFile &Input, unsigned ModI, - const SymbolResolution *&ResI, const SymbolResolution *ResE); - - Expected<RegularLTOState::AddedModule> - addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, - const SymbolResolution *&ResI, const SymbolResolution *ResE); + // These functions take a range of symbol resolutions and consume the + // resolutions used by a single input module. Functions return ranges refering + // to the resolutions for the remaining modules in the InputFile. + Expected<ArrayRef<SymbolResolution>> + addModule(InputFile &Input, ArrayRef<SymbolResolution> InputRes, + unsigned ModI, ArrayRef<SymbolResolution> Res); + + Expected<std::pair<RegularLTOState::AddedModule, ArrayRef<SymbolResolution>>> + addRegularLTO(InputFile &Input, ArrayRef<SymbolResolution> InputRes, + BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, + ArrayRef<SymbolResolution> Res); Error linkRegularLTO(RegularLTOState::AddedModule Mod, bool LivenessFromIndex); - Error addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, - const SymbolResolution *&ResI, const SymbolResolution *ResE); + Expected<ArrayRef<SymbolResolution>> + addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, + ArrayRef<SymbolResolution> Res); Error runRegularLTO(AddStreamFn AddStream); Error runThinLTO(AddStreamFn AddStream, FileCache Cache, diff --git a/llvm/include/llvm/MC/DXContainerRootSignature.h b/llvm/include/llvm/MC/DXContainerRootSignature.h index 14a2429..3c7c886 100644 --- a/llvm/include/llvm/MC/DXContainerRootSignature.h +++ b/llvm/include/llvm/MC/DXContainerRootSignature.h @@ -10,6 +10,7 @@ #define LLVM_MC_DXCONTAINERROOTSIGNATURE_H #include "llvm/BinaryFormat/DXContainer.h" +#include "llvm/Support/Compiler.h" #include <cstdint> #include <limits> @@ -113,9 +114,9 @@ struct RootSignatureDesc { mcdxbc::RootParametersContainer ParametersContainer; SmallVector<dxbc::RTS0::v1::StaticSampler> StaticSamplers; - void write(raw_ostream &OS) const; + LLVM_ABI void write(raw_ostream &OS) const; - size_t getSize() const; + LLVM_ABI size_t getSize() const; }; } // namespace mcdxbc } // namespace llvm diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h index 467ad4e..4853701 100644 --- a/llvm/include/llvm/MC/MCAssembler.h +++ b/llvm/include/llvm/MC/MCAssembler.h @@ -209,7 +209,7 @@ public: LLVM_ABI bool registerSection(MCSection &Section); LLVM_ABI bool registerSymbol(const MCSymbol &Symbol); - void addRelocDirective(RelocDirective RD); + LLVM_ABI void addRelocDirective(RelocDirective RD); LLVM_ABI void reportError(SMLoc L, const Twine &Msg) const; // Record pending errors during layout iteration, as they may go away once the diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index 5ce58ae..d69560c 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -69,6 +69,9 @@ protected: /// Language Specific Data Area information is emitted to. MCSection *LSDASection = nullptr; + /// Section containing call graph metadata. + MCSection *CallGraphSection = nullptr; + /// If exception handling is supported by the target and the target can /// support a compact representation of the CIE and FDE, this is the section /// to emit them into. @@ -359,6 +362,8 @@ public: MCSection *getFaultMapSection() const { return FaultMapSection; } MCSection *getRemarksSection() const { return RemarksSection; } + MCSection *getCallGraphSection(const MCSection &TextSec) const; + MCSection *getStackSizesSection(const MCSection &TextSec) const; MCSection *getBBAddrMapSection(const MCSection &TextSec) const; diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index 5ac7aba..4b43a8f 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -52,10 +52,6 @@ class MCObjectStreamer : public MCStreamer { DenseMap<const MCSymbol *, SmallVector<PendingAssignment, 1>> pendingAssignments; - SmallVector<std::unique_ptr<char[]>, 0> FragStorage; - // Available bytes in the current block for trailing data or new fragments. - size_t FragSpace = 0; - void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &); void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; @@ -88,18 +84,11 @@ public: // Add a fragment with a variable-size tail and start a new empty fragment. void insert(MCFragment *F); - char *getCurFragEnd() const { - return reinterpret_cast<char *>(CurFrag + 1) + CurFrag->getFixedSize(); - } - MCFragment *allocFragSpace(size_t Headroom); // Add a new fragment to the current section without a variable-size tail. void newFragment(); - void ensureHeadroom(size_t Headroom); void appendContents(ArrayRef<char> Contents); void appendContents(size_t Num, char Elt); - // Add a fixup to the current fragment. Call ensureHeadroom beforehand to - // ensure the fixup and appended content apply to the same fragment. void addFixup(const MCExpr *Value, MCFixupKind Kind); void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h index 3bd986f..df8f617b 100644 --- a/llvm/include/llvm/MC/MCSection.h +++ b/llvm/include/llvm/MC/MCSection.h @@ -93,7 +93,8 @@ protected: // Track content and fixups for the fixed-size part as fragments are // appended to the section. The content remains immutable, except when // modified by applyFixup. - uint32_t FixedSize = 0; + uint32_t ContentStart = 0; + uint32_t ContentEnd = 0; uint32_t FixupStart = 0; uint32_t FixupEnd = 0; @@ -187,18 +188,30 @@ public: //== Content-related functions manage parent's storage using ContentStart and // ContentSize. + // Get a SmallVector reference. The caller should call doneAppending to update + // `ContentEnd`. + SmallVectorImpl<char> &getContentsForAppending(); + void doneAppending(); + void appendContents(ArrayRef<char> Contents) { + getContentsForAppending().append(Contents.begin(), Contents.end()); + doneAppending(); + } + void appendContents(size_t Num, char Elt) { + getContentsForAppending().append(Num, Elt); + doneAppending(); + } MutableArrayRef<char> getContents(); ArrayRef<char> getContents() const; - void setVarContents(ArrayRef<char> Contents); + LLVM_ABI void setVarContents(ArrayRef<char> Contents); void clearVarContents() { setVarContents({}); } MutableArrayRef<char> getVarContents(); ArrayRef<char> getVarContents() const; - size_t getFixedSize() const { return FixedSize; } + size_t getFixedSize() const { return ContentEnd - ContentStart; } size_t getVarSize() const { return VarContentEnd - VarContentStart; } size_t getSize() const { - return FixedSize + (VarContentEnd - VarContentStart); + return ContentEnd - ContentStart + (VarContentEnd - VarContentStart); } //== Fixup-related functions manage parent's storage using FixupStart and @@ -211,7 +224,7 @@ public: // Source fixup offsets are relative to the variable part's start. // Stored fixup offsets are relative to the fixed part's start. - void setVarFixups(ArrayRef<MCFixup> Fixups); + LLVM_ABI void setVarFixups(ArrayRef<MCFixup> Fixups); void clearVarFixups() { setVarFixups({}); } MutableArrayRef<MCFixup> getVarFixups(); ArrayRef<MCFixup> getVarFixups() const; @@ -621,11 +634,28 @@ public: bool isBssSection() const { return IsBss; } }; +inline SmallVectorImpl<char> &MCFragment::getContentsForAppending() { + SmallVectorImpl<char> &S = getParent()->ContentStorage; + if (LLVM_UNLIKELY(ContentEnd != S.size())) { + // Move the elements to the end. Reserve space to avoid invalidating + // S.begin()+I for `append`. + auto Size = ContentEnd - ContentStart; + auto I = std::exchange(ContentStart, S.size()); + S.reserve(S.size() + Size); + S.append(S.begin() + I, S.begin() + I + Size); + } + return S; +} +inline void MCFragment::doneAppending() { + ContentEnd = getParent()->ContentStorage.size(); +} inline MutableArrayRef<char> MCFragment::getContents() { - return {reinterpret_cast<char *>(this + 1), FixedSize}; + return MutableArrayRef(getParent()->ContentStorage) + .slice(ContentStart, ContentEnd - ContentStart); } inline ArrayRef<char> MCFragment::getContents() const { - return {reinterpret_cast<const char *>(this + 1), FixedSize}; + return ArrayRef(getParent()->ContentStorage) + .slice(ContentStart, ContentEnd - ContentStart); } inline MutableArrayRef<char> MCFragment::getVarContents() { diff --git a/llvm/include/llvm/Object/SFrameParser.h b/llvm/include/llvm/Object/SFrameParser.h index cf4fe20..245e7ba 100644 --- a/llvm/include/llvm/Object/SFrameParser.h +++ b/llvm/include/llvm/Object/SFrameParser.h @@ -11,6 +11,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/BinaryFormat/SFrame.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include <cstdint> @@ -19,11 +20,14 @@ namespace object { template <endianness E> class SFrameParser { public: - static Expected<SFrameParser> create(ArrayRef<uint8_t> Contents); + static Expected<SFrameParser> create(ArrayRef<uint8_t> Contents, + uint64_t SectionAddress); const sframe::Preamble<E> &getPreamble() const { return Header.Preamble; } const sframe::Header<E> &getHeader() const { return Header; } + Expected<ArrayRef<uint8_t>> getAuxHeader() const; + bool usesFixedRAOffset() const { return getHeader().ABIArch == sframe::ABI::AMD64EndianLittle; } @@ -31,16 +35,29 @@ public: return false; // Not used in any currently defined ABI. } + using FDERange = ArrayRef<sframe::FuncDescEntry<E>>; + Expected<FDERange> fdes() const; + + // Decodes the start address of the given FDE, which must be one of the + // objects returned by the `fdes()` function. + uint64_t getAbsoluteStartAddress(typename FDERange::iterator FDE) const; + private: ArrayRef<uint8_t> Data; + uint64_t SectionAddress; const sframe::Header<E> &Header; - SFrameParser(ArrayRef<uint8_t> Data, const sframe::Header<E> &Header) - : Data(Data), Header(Header) {} + SFrameParser(ArrayRef<uint8_t> Data, uint64_t SectionAddress, + const sframe::Header<E> &Header) + : Data(Data), SectionAddress(SectionAddress), Header(Header) {} + + uint64_t getFDEBase() const { + return sizeof(Header) + Header.AuxHdrLen + Header.FDEOff; + } }; -extern template class SFrameParser<endianness::big>; -extern template class SFrameParser<endianness::little>; +extern template class LLVM_TEMPLATE_ABI SFrameParser<endianness::big>; +extern template class LLVM_TEMPLATE_ABI SFrameParser<endianness::little>; } // end namespace object } // end namespace llvm diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index e883f2f..3bf8c29 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -117,7 +117,7 @@ struct FileHeader { llvm::yaml::Hex8 ABIVersion; ELF_ET Type; std::optional<ELF_EM> Machine; - ELF_EF Flags; + std::optional<ELF_EF> Flags; llvm::yaml::Hex64 Entry; std::optional<StringRef> SectionHeaderStringTable; diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index b0360f1..97c3ff8 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -114,13 +114,16 @@ #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/CFGuard.h" +#include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopStrengthReduce.h" +#include "llvm/Transforms/Scalar/LoopTermFold.h" #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" #include "llvm/Transforms/Scalar/MergeICmps.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" +#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/LowerInvoke.h" #include <cassert> @@ -754,7 +757,12 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addIRPasses( // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOptLevel::None && !Opt.DisableLSR) { - addPass(createFunctionToLoopPassAdaptor(LoopStrengthReducePass(), + LoopPassManager LPM; + LPM.addPass(CanonicalizeFreezeInLoopsPass()); + LPM.addPass(LoopStrengthReducePass()); + if (Opt.EnableLoopTermFold) + LPM.addPass(LoopTermFoldPass()); + addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true)); } @@ -799,7 +807,8 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addIRPasses( addPass(ScalarizeMaskedMemIntrinPass()); // Expand reduction intrinsics into shuffle sequences if the target wants to. - addPass(ExpandReductionsPass()); + if (!Opt.DisableExpandReductions) + addPass(ExpandReductionsPass()); // Convert conditional moves to conditional jumps when profitable. if (getOptLevel() != CodeGenOptLevel::None && !Opt.DisableSelectOptimize) @@ -877,6 +886,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addISelPrepare( if (Opt.RequiresCodeGenSCCOrder) addPass.requireCGSCCOrder(); + if (getOptLevel() != CodeGenOptLevel::None) + addPass(ObjCARCContractPass()); + addPass(CallBrPreparePass()); // Add both the safe stack and the stack protection passes: each of them will // only protect functions that have corresponding attributes. diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc index 3f785bd..26baddd 100644 --- a/llvm/include/llvm/ProfileData/MemProfData.inc +++ b/llvm/include/llvm/ProfileData/MemProfData.inc @@ -33,11 +33,10 @@ (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129) // The version number of the raw binary format. -#define MEMPROF_RAW_VERSION 4ULL +#define MEMPROF_RAW_VERSION 5ULL // Currently supported versions. -#define MEMPROF_RAW_SUPPORTED_VERSIONS \ - { 3ULL, 4ULL } +#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL} #define MEMPROF_V3_MIB_SIZE 132ULL; @@ -229,6 +228,41 @@ void Merge(const MemInfoBlock &newMIB) { } __attribute__((__packed__)); #endif +constexpr int MantissaBits = 12; +constexpr int ExponentBits = 4; +constexpr uint16_t MaxMantissa = (1U << MantissaBits) - 1; +constexpr uint16_t MaxExponent = (1U << ExponentBits) - 1; +constexpr uint64_t MaxRepresentableValue = static_cast<uint64_t>(MaxMantissa) + << MaxExponent; + +// Encodes a 64-bit unsigned integer into a 16-bit scaled integer format. +inline uint16_t encodeHistogramCount(uint64_t Count) { + if (Count == 0) + return 0; + + if (Count > MaxRepresentableValue) + Count = MaxRepresentableValue; + + if (Count <= MaxMantissa) + return Count; + + uint64_t M = Count; + uint16_t E = 0; + while (M > MaxMantissa) { + M = (M + 1) >> 1; + E++; + } + return (E << MantissaBits) | static_cast<uint16_t>(M); +} + +// Decodes a 16-bit scaled integer and returns the +// decoded 64-bit unsigned integer. +inline uint64_t decodeHistogramCount(uint16_t EncodedValue) { + const uint16_t E = EncodedValue >> MantissaBits; + const uint16_t M = EncodedValue & MaxMantissa; + return static_cast<uint64_t>(M) << E; +} + } // namespace memprof } // namespace llvm diff --git a/llvm/include/llvm/Support/AArch64AttributeParser.h b/llvm/include/llvm/Support/AArch64AttributeParser.h index 796dbfd..f4552ef 100644 --- a/llvm/include/llvm/Support/AArch64AttributeParser.h +++ b/llvm/include/llvm/Support/AArch64AttributeParser.h @@ -34,7 +34,7 @@ struct AArch64BuildAttrSubsections { uint32_t AndFeatures = 0; }; -AArch64BuildAttrSubsections +LLVM_ABI AArch64BuildAttrSubsections extractBuildAttributesSubsections(const llvm::AArch64AttributeParser &); } // namespace llvm diff --git a/llvm/include/llvm/Support/DebugLog.h b/llvm/include/llvm/Support/DebugLog.h index 8fca2d5..a331295 100644 --- a/llvm/include/llvm/Support/DebugLog.h +++ b/llvm/include/llvm/Support/DebugLog.h @@ -61,8 +61,10 @@ namespace llvm { for (bool _c = \ (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE, LEVEL)); \ _c; _c = false) \ + for (::llvm::impl::RAIINewLineStream NewLineStream{(STREAM)}; _c; \ + _c = false) \ ::llvm::impl::raw_ldbg_ostream{ \ - ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), (STREAM)} \ + ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), NewLineStream} \ .asLvalue() #define DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, LEVEL, TYPE, FILE) \ @@ -81,14 +83,15 @@ namespace llvm { namespace impl { -/// A raw_ostream that tracks `\n` and print the prefix. +/// A raw_ostream that tracks `\n` and print the prefix after each +/// newline. class LLVM_ABI raw_ldbg_ostream final : public raw_ostream { std::string Prefix; raw_ostream &Os; - bool HasPendingNewline = true; + bool HasPendingNewline; - /// Split the line on newlines and insert the prefix before each newline. - /// Forward everything to the underlying stream. + /// Split the line on newlines and insert the prefix before each + /// newline. Forward everything to the underlying stream. void write_impl(const char *Ptr, size_t Size) final { auto Str = StringRef(Ptr, Size); // Handle the initial prefix. @@ -109,22 +112,18 @@ class LLVM_ABI raw_ldbg_ostream final : public raw_ostream { } void emitPrefix() { Os.write(Prefix.c_str(), Prefix.size()); } void writeWithPrefix(StringRef Str) { - if (HasPendingNewline) { - emitPrefix(); - HasPendingNewline = false; - } + flushEol(); Os.write(Str.data(), Str.size()); } public: - explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os) - : Prefix(std::move(Prefix)), Os(Os) { + explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os, + bool HasPendingNewline = true) + : Prefix(std::move(Prefix)), Os(Os), + HasPendingNewline(HasPendingNewline) { SetUnbuffered(); } - ~raw_ldbg_ostream() final { - flushEol(); - Os << '\n'; - } + ~raw_ldbg_ostream() final { flushEol(); } void flushEol() { if (HasPendingNewline) { emitPrefix(); @@ -135,10 +134,22 @@ public: /// Forward the current_pos method to the underlying stream. uint64_t current_pos() const final { return Os.tell(); } - /// Some of the `<<` operators expect an lvalue, so we trick the type system. + /// Some of the `<<` operators expect an lvalue, so we trick the type + /// system. raw_ldbg_ostream &asLvalue() { return *this; } }; +/// A raw_ostream that prints a newline on destruction, useful for LDBG() +class RAIINewLineStream final : public raw_ostream { + raw_ostream &Os; + +public: + RAIINewLineStream(raw_ostream &Os) : Os(Os) { SetUnbuffered(); } + ~RAIINewLineStream() { Os << '\n'; } + void write_impl(const char *Ptr, size_t Size) final { Os.write(Ptr, Size); } + uint64_t current_pos() const final { return Os.tell(); } +}; + /// Remove the path prefix from the file name. static LLVM_ATTRIBUTE_UNUSED constexpr const char * getShortFileName(const char *path) { diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h index 734b795..d976773 100644 --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -1069,7 +1069,7 @@ public: /// Redirect each of the remapped files from first to second. static std::unique_ptr<RedirectingFileSystem> create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles, - bool UseExternalNames, FileSystem &ExternalFS); + bool UseExternalNames, IntrusiveRefCntPtr<FileSystem> ExternalFS); ErrorOr<Status> status(const Twine &Path) override; bool exists(const Twine &Path) override; diff --git a/llvm/include/llvm/Support/Windows/WindowsSupport.h b/llvm/include/llvm/Support/Windows/WindowsSupport.h index ffc6fdf..f35e7b5 100644 --- a/llvm/include/llvm/Support/Windows/WindowsSupport.h +++ b/llvm/include/llvm/Support/Windows/WindowsSupport.h @@ -245,6 +245,10 @@ LLVM_ABI std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16, size_t MaxPathLen = MAX_PATH); +/// Retrieves the handle to a in-memory system module such as ntdll.dll, while +/// ensuring we're not retrieving a malicious injected module but a module +/// loaded from the system path. +LLVM_ABI HMODULE loadSystemModuleSecure(LPCWSTR lpModuleName); } // end namespace windows } // end namespace sys } // end namespace llvm. diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h index f29cbe7..8d0a7e6 100644 --- a/llvm/include/llvm/Target/CGPassBuilderOption.h +++ b/llvm/include/llvm/Target/CGPassBuilderOption.h @@ -52,6 +52,8 @@ struct CGPassBuilderOption { bool EnableMachineFunctionSplitter = false; bool EnableSinkAndFold = false; bool EnableTailMerge = true; + /// Enable LoopTermFold immediately after LSR. + bool EnableLoopTermFold = false; bool MISchedPostRA = false; bool EarlyLiveIntervals = false; bool GCEmptyBlocks = false; diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index fc81ab7..b619de3 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -194,6 +194,7 @@ def IsExact : MIFlagEnum<"IsExact">; def NoSWrap : MIFlagEnum<"NoSWrap">; def NoUWrap : MIFlagEnum<"NoUWrap">; def NonNeg : MIFlagEnum<"NonNeg">; +def InBounds : MIFlagEnum<"InBounds">; def MIFlags; // def not; -> Already defined as a SDNode diff --git a/llvm/include/llvm/TextAPI/SymbolSet.h b/llvm/include/llvm/TextAPI/SymbolSet.h index a04cb35..42c411a 100644 --- a/llvm/include/llvm/TextAPI/SymbolSet.h +++ b/llvm/include/llvm/TextAPI/SymbolSet.h @@ -92,7 +92,7 @@ private: public: SymbolSet() = default; - ~SymbolSet(); + LLVM_ABI ~SymbolSet(); LLVM_ABI Symbol *addGlobal(EncodeKind Kind, StringRef Name, SymbolFlags Flags, const Target &Targ); size_t size() const { return Symbols.size(); } diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 719c0ee..e57032a 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -6494,7 +6494,7 @@ struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> { } constexpr static const std::optional<TypeSize> HasNoAllocationSize = - std::optional<TypeSize>(TypeSize(-1, true)); + std::make_optional<TypeSize>(-1, true); LLVM_ABI static const char ID; }; diff --git a/llvm/include/llvm/Transforms/Scalar/Reassociate.h b/llvm/include/llvm/Transforms/Scalar/Reassociate.h index a5d13766..749f6ee 100644 --- a/llvm/include/llvm/Transforms/Scalar/Reassociate.h +++ b/llvm/include/llvm/Transforms/Scalar/Reassociate.h @@ -28,6 +28,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Compiler.h" #include <deque> namespace llvm { @@ -96,7 +97,7 @@ protected: bool MadeChange; public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &); + LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &); private: void BuildRankMap(Function &F, ReversePostOrderTraversal<Function *> &RPOT); diff --git a/llvm/include/llvm/Transforms/Utils/Mem2Reg.h b/llvm/include/llvm/Transforms/Utils/Mem2Reg.h index 76c1c2c..d0006bf 100644 --- a/llvm/include/llvm/Transforms/Utils/Mem2Reg.h +++ b/llvm/include/llvm/Transforms/Utils/Mem2Reg.h @@ -15,6 +15,7 @@ #define LLVM_TRANSFORMS_UTILS_MEM2REG_H #include "llvm/IR/PassManager.h" +#include "llvm/Support/Compiler.h" namespace llvm { @@ -22,7 +23,7 @@ class Function; class PromotePass : public PassInfoMixin<PromotePass> { public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/ProfileVerify.h b/llvm/include/llvm/Transforms/Utils/ProfileVerify.h index 7834305..5c9c44c 100644 --- a/llvm/include/llvm/Transforms/Utils/ProfileVerify.h +++ b/llvm/include/llvm/Transforms/Utils/ProfileVerify.h @@ -15,13 +15,14 @@ #include "llvm/IR/Analysis.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/Compiler.h" namespace llvm { /// Inject MD_prof metadata where it's missing. Used for testing that passes /// don't accidentally drop this metadata. class ProfileInjectorPass : public PassInfoMixin<ProfileInjectorPass> { public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); }; /// Checks that MD_prof is present on every instruction that supports it. Used @@ -29,7 +30,7 @@ public: /// valid (i.e. !{!"unknown"}) class ProfileVerifierPass : public PassInfoMixin<ProfileVerifierPass> { public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index cba3736..43ff084 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -400,19 +400,11 @@ public: /// Returns true if the loop has exactly one uncountable early exit, i.e. an /// uncountable exit that isn't the latch block. - bool hasUncountableEarlyExit() const { - return getUncountableEdge().has_value(); - } + bool hasUncountableEarlyExit() const { return UncountableExitingBB; } /// Returns the uncountable early exiting block, if there is exactly one. BasicBlock *getUncountableEarlyExitingBlock() const { - return hasUncountableEarlyExit() ? getUncountableEdge()->first : nullptr; - } - - /// Returns the destination of the uncountable early exiting block, if there - /// is exactly one. - BasicBlock *getUncountableEarlyExitBlock() const { - return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr; + return UncountableExitingBB; } /// Return true if there is store-load forwarding dependencies. @@ -473,13 +465,6 @@ public: return CountableExitingBlocks; } - /// Returns the loop edge to an uncountable exit, or std::nullopt if there - /// isn't a single such edge. - std::optional<std::pair<BasicBlock *, BasicBlock *>> - getUncountableEdge() const { - return UncountableEdge; - } - private: /// Return true if the pre-header, exiting and latch blocks of \p Lp and all /// its nested loops are considered legal for vectorization. These legal @@ -659,9 +644,9 @@ private: /// the exact backedge taken count is not computable. SmallVector<BasicBlock *, 4> CountableExitingBlocks; - /// Keep track of the loop edge to an uncountable exit, comprising a pair - /// of (Exiting, Exit) blocks, if there is exactly one early exit. - std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge; + /// Keep track of an uncountable exiting block, if there is exactly one early + /// exit. + BasicBlock *UncountableExitingBB = nullptr; }; } // namespace llvm |