diff options
Diffstat (limited to 'llvm/include')
34 files changed, 382 insertions, 105 deletions
| diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h index 4039078..00f85ca 100644 --- a/llvm/include/llvm/ADT/GenericCycleImpl.h +++ b/llvm/include/llvm/ADT/GenericCycleImpl.h @@ -561,6 +561,17 @@ auto GenericCycleInfo<ContextT>::getSmallestCommonCycle(CycleT *A,    return A;  } +/// \brief Find the innermost cycle containing both given blocks. +/// +/// \returns the innermost cycle containing both \p A and \p B +///          or nullptr if there is no such cycle. +template <typename ContextT> +auto GenericCycleInfo<ContextT>::getSmallestCommonCycle(BlockT *A, +                                                        BlockT *B) const +    -> CycleT * { +  return getSmallestCommonCycle(getCycle(A), getCycle(B)); +} +  /// \brief get the depth for the cycle which containing a given block.  ///  /// \returns the depth for the innermost cycle containing \p Block or 0 if it is diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h index b8b6e3e..c31bab3 100644 --- a/llvm/include/llvm/ADT/GenericCycleInfo.h +++ b/llvm/include/llvm/ADT/GenericCycleInfo.h @@ -298,6 +298,7 @@ public:    CycleT *getCycle(const BlockT *Block) const;    CycleT *getSmallestCommonCycle(CycleT *A, CycleT *B) const; +  CycleT *getSmallestCommonCycle(BlockT *A, BlockT *B) const;    unsigned getCycleDepth(const BlockT *Block) const;    CycleT *getTopLevelParentCycle(BlockT *Block); diff --git a/llvm/include/llvm/ADT/TypeSwitch.h b/llvm/include/llvm/ADT/TypeSwitch.h index 5657303..50ca1d5 100644 --- a/llvm/include/llvm/ADT/TypeSwitch.h +++ b/llvm/include/llvm/ADT/TypeSwitch.h @@ -111,6 +111,7 @@ public:        return std::move(*result);      return defaultFn(this->value);    } +    /// As a default, return the given value.    [[nodiscard]] ResultT Default(ResultT defaultResult) {      if (result) @@ -118,6 +119,22 @@ public:      return defaultResult;    } +  /// Default for pointer-like results types that accept `nullptr`. +  template <typename ArgT = ResultT, +            typename = +                std::enable_if_t<std::is_constructible_v<ArgT, std::nullptr_t>>> +  [[nodiscard]] ResultT Default(std::nullptr_t) { +    return Default(ResultT(nullptr)); +  } + +  /// Default for optional results types that accept `std::nullopt`. +  template <typename ArgT = ResultT, +            typename = +                std::enable_if_t<std::is_constructible_v<ArgT, std::nullopt_t>>> +  [[nodiscard]] ResultT Default(std::nullopt_t) { +    return Default(ResultT(std::nullopt)); +  } +    /// Declare default as unreachable, making sure that all cases were handled.    [[nodiscard]] ResultT DefaultUnreachable(        const char *message = "Fell off the end of a type-switch") { diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h index 5ad6288..71055dd16 100644 --- a/llvm/include/llvm/Analysis/IR2Vec.h +++ b/llvm/include/llvm/Analysis/IR2Vec.h @@ -161,7 +161,7 @@ private:  public:    /// Default constructor creates empty storage (invalid state) -  VocabStorage() : Sections(), TotalSize(0), Dimension(0) {} +  VocabStorage() = default;    /// Create a VocabStorage with pre-organized section data    VocabStorage(std::vector<std::vector<Embedding>> &&SectionData); diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 6ee6b666..39e9611 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -1125,6 +1125,8 @@ struct Elf64_Shdr {    Elf64_Xword sh_entsize;  }; +enum { PN_XNUM = 0xffff }; +  // Special section indices.  enum {    SHN_UNDEF = 0,          // Undefined, missing, irrelevant, or meaningless diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 48650a6..8237530 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -54,6 +54,10 @@ struct FunctionPathAndClusterInfo {    DenseMap<UniqueBBID, uint64_t> NodeCounts;    // Edge counts for each edge, stored as a nested map.    DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts; +  // Hash for each basic block. The Hashes are stored for every original block +  // (not cloned blocks), hence the map key being unsigned instead of +  // UniqueBBID. +  DenseMap<unsigned, uint64_t> BBHashes;  };  class BasicBlockSectionsProfileReader { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 76b6c8e..e8dbc96 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -594,12 +594,13 @@ public:      // Check if suitable for a bit test      if (N <= DL.getIndexSizeInBits(0u)) { -      SmallPtrSet<const BasicBlock *, 4> Dests; -      for (auto I : SI.cases()) -        Dests.insert(I.getCaseSuccessor()); +      DenseMap<const BasicBlock *, unsigned int> DestMap; +      for (auto I : SI.cases()) { +        const BasicBlock *BB = I.getCaseSuccessor(); +        ++DestMap[BB]; +      } -      if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, -                                     DL)) +      if (TLI->isSuitableForBitTests(DestMap, MinCaseVal, MaxCaseVal, DL))          return 1;      } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index b0601eb..36cb90b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -640,7 +640,8 @@ public:    /// This variant does not erase \p MI after calling the build function.    void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const; -  bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo) const; +  bool matchOrShiftToFunnelShift(MachineInstr &MI, bool AllowScalarConstants, +                                 BuildFnTy &MatchInfo) const;    bool matchFunnelShiftToRotate(MachineInstr &MI) const;    void applyFunnelShiftToRotate(MachineInstr &MI) const;    bool matchRotateOutOfRange(MachineInstr &MI) const; diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h index a9e53ba..f980d3d 100644 --- a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h +++ b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h @@ -84,6 +84,10 @@ LLVM_ABI Libcall getSINCOS(EVT RetVT);  /// UNKNOWN_LIBCALL if there is none.  LLVM_ABI Libcall getSINCOSPI(EVT RetVT); +/// Return the SINCOS_STRET_ value for the given types, or UNKNOWN_LIBCALL if +/// there is none. +LLVM_ABI Libcall getSINCOS_STRET(EVT RetVT); +  /// getMODF - Return the MODF_* value for the given types, or  /// UNKNOWN_LIBCALL if there is none.  LLVM_ABI Libcall getMODF(EVT RetVT); diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index 0dcf400..9a6bf5f 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -583,6 +583,18 @@ m_InsertSubvector(const LHS &Base, const RHS &Sub, const IDX &Idx) {    return TernaryOpc_match<LHS, RHS, IDX>(ISD::INSERT_SUBVECTOR, Base, Sub, Idx);  } +template <typename T0_P, typename T1_P, typename T2_P> +inline TernaryOpc_match<T0_P, T1_P, T2_P> +m_TernaryOp(unsigned Opc, const T0_P &Op0, const T1_P &Op1, const T2_P &Op2) { +  return TernaryOpc_match<T0_P, T1_P, T2_P>(Opc, Op0, Op1, Op2); +} + +template <typename T0_P, typename T1_P, typename T2_P> +inline TernaryOpc_match<T0_P, T1_P, T2_P, true> +m_c_TernaryOp(unsigned Opc, const T0_P &Op0, const T1_P &Op1, const T2_P &Op2) { +  return TernaryOpc_match<T0_P, T1_P, T2_P, true>(Opc, Op0, Op1, Op2); +} +  template <typename LTy, typename RTy, typename TTy, typename FTy, typename CCTy>  inline auto m_SelectCC(const LTy &L, const RTy &R, const TTy &T, const FTy &F,                         const CCTy &CC) { diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index d6ed3a8..1920b98 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1433,9 +1433,9 @@ public:    /// \p High as its lowest and highest case values, and expects \p NumCmps    /// case value comparisons. Check if the number of destinations, comparison    /// metric, and range are all suitable. -  bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, -                             const APInt &Low, const APInt &High, -                             const DataLayout &DL) const { +  bool isSuitableForBitTests( +      const DenseMap<const BasicBlock *, unsigned int> &DestCmps, +      const APInt &Low, const APInt &High, const DataLayout &DL) const {      // FIXME: I don't think NumCmps is the correct metric: a single case and a      // range of cases both require only one branch to lower. Just looking at the      // number of clusters and destinations should be enough to decide whether to @@ -1446,6 +1446,20 @@ public:      if (!rangeFitsInWord(Low, High, DL))        return false; +    unsigned NumDests = DestCmps.size(); +    unsigned NumCmps = 0; +    unsigned int MaxBitTestEntry = 0; +    for (auto &DestCmp : DestCmps) { +      NumCmps += DestCmp.second; +      if (DestCmp.second > MaxBitTestEntry) +        MaxBitTestEntry = DestCmp.second; +    } + +    // Comparisons might be cheaper for small number of comparisons, which can +    // be Arch Target specific. +    if (MaxBitTestEntry < getMinimumBitTestCmps()) +      return false; +      // Decide whether it's profitable to lower this range with bit tests. Each      // destination requires a bit test and branch, and there is an overall range      // check branch. For a small number of clusters, separate comparisons might @@ -2055,6 +2069,9 @@ public:    virtual bool isJumpTableRelative() const; +  /// Retuen the minimum of largest number of comparisons in BitTest. +  unsigned getMinimumBitTestCmps() const; +    /// If a physical register, this specifies the register that    /// llvm.savestack/llvm.restorestack should save and restore.    Register getStackPointerRegisterToSaveRestore() const { @@ -2577,6 +2594,9 @@ protected:    /// Set to zero to generate unlimited jump tables.    void setMaximumJumpTableSize(unsigned); +  /// Set the minimum of largest of number of comparisons to generate BitTest. +  void setMinimumBitTestCmps(unsigned Val); +    /// If set to a physical register, this specifies the register that    /// llvm.savestack/llvm.restorestack should save and restore.    void setStackPointerRegisterToSaveRestore(Register R) { @@ -3719,6 +3739,9 @@ private:    /// backend supports.    unsigned MinCmpXchgSizeInBits; +  /// The minimum of largest number of comparisons to use bit test for switch. +  unsigned MinimumBitTestCmps; +    /// This indicates if the target supports unaligned atomic operations.    bool SupportsUnalignedAtomics; @@ -3738,7 +3761,7 @@ private:    /// register class is the largest legal super-reg register class of the    /// register class of the specified type. e.g. On x86, i8, i16, and i32's    /// representative class would be GR32. -  const TargetRegisterClass *RepRegClassForVT[MVT::VALUETYPE_SIZE] = {0}; +  const TargetRegisterClass *RepRegClassForVT[MVT::VALUETYPE_SIZE] = {nullptr};    /// This indicates the "cost" of the "representative" register class for each    /// ValueType. The cost is used by the scheduler to approximate register diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h index c69b6f7..8620726 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h @@ -6,7 +6,7 @@  //  //===----------------------------------------------------------------------===//  // -// Implements ExecutorProcessControl::MemoryAccess by making calls to +// Implements the MemoryAccess interface by making calls to  // ExecutorProcessControl::callWrapperAsync.  //  // This simplifies the implementaton of new ExecutorProcessControl instances, @@ -19,6 +19,7 @@  #define LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H  #include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/MemoryAccess.h"  namespace llvm {  namespace orc { diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index 87b9520..d7f0e3a3d4 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -1167,6 +1167,14 @@ struct ThreadsT {    using EmptyTrait = std::true_type;  }; +// V6.0: [14.8] `threadset` clause +template <typename T, typename I, typename E> // +struct ThreadsetT { +  ENUM(ThreadsetPolicy, Omp_Pool, Omp_Team); +  using WrapperTrait = std::true_type; +  ThreadsetPolicy v; +}; +  // V5.2: [5.9.1] `to` clause  template <typename T, typename I, typename E> //  struct ToT { @@ -1352,9 +1360,9 @@ using WrapperClausesT = std::variant<      ProcBindT<T, I, E>, ReverseOffloadT<T, I, E>, SafelenT<T, I, E>,      SelfMapsT<T, I, E>, SeverityT<T, I, E>, SharedT<T, I, E>, SimdlenT<T, I, E>,      SizesT<T, I, E>, PermutationT<T, I, E>, ThreadLimitT<T, I, E>, -    UnifiedAddressT<T, I, E>, UnifiedSharedMemoryT<T, I, E>, UniformT<T, I, E>, -    UpdateT<T, I, E>, UseDeviceAddrT<T, I, E>, UseDevicePtrT<T, I, E>, -    UsesAllocatorsT<T, I, E>>; +    ThreadsetT<T, I, E>, UnifiedAddressT<T, I, E>, +    UnifiedSharedMemoryT<T, I, E>, UniformT<T, I, E>, UpdateT<T, I, E>, +    UseDeviceAddrT<T, I, E>, UseDevicePtrT<T, I, E>, UsesAllocatorsT<T, I, E>>;  template <typename T, typename I, typename E>  using UnionOfAllClausesT = typename type::Union< // diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 61a1a05..208609f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -539,6 +539,10 @@ def OMPC_GroupPrivate : Clause<[Spelling<"groupprivate">]> {  def OMPC_Threads : Clause<[Spelling<"threads">]> {    let clangClass = "OMPThreadsClause";  } +def OMPC_Threadset : Clause<[Spelling<"threadset">]> { +  let clangClass = "OMPThreadsetClause"; +  let flangClass = "OmpThreadsetClause"; +}  def OMPC_To : Clause<[Spelling<"to">]> {    let clangClass = "OMPToClause";    let flangClass = "OmpToClause"; @@ -1254,6 +1258,7 @@ def OMP_Task : Directive<[Spelling<"task">]> {      VersionedClause<OMPC_Final>,      VersionedClause<OMPC_If>,      VersionedClause<OMPC_Priority>, +    VersionedClause<OMPC_Threadset, 60>,      VersionedClause<OMPC_Replayable, 60>,      VersionedClause<OMPC_Transparent, 60>,    ]; @@ -1297,6 +1302,7 @@ def OMP_TaskLoop : Directive<[Spelling<"taskloop">]> {      VersionedClause<OMPC_Final>,      VersionedClause<OMPC_If>,      VersionedClause<OMPC_Priority>, +    VersionedClause<OMPC_Threadset, 60>,      VersionedClause<OMPC_Replayable, 60>,      VersionedClause<OMPC_Transparent, 60>,    ]; diff --git a/llvm/include/llvm/IR/AbstractCallSite.h b/llvm/include/llvm/IR/AbstractCallSite.h index 9e24ae7..f431e1d 100644 --- a/llvm/include/llvm/IR/AbstractCallSite.h +++ b/llvm/include/llvm/IR/AbstractCallSite.h @@ -137,7 +137,7 @@ public:    /// Return true if @p U is the use that defines the callee of this ACS.    bool isCallee(const Use *U) const { -    if (isDirectCall()) +    if (!isCallbackCall())        return CB->isCallee(U);      assert(!CI.ParameterEncoding.empty() && @@ -154,7 +154,7 @@ public:    /// Return the number of parameters of the callee.    unsigned getNumArgOperands() const { -    if (isDirectCall()) +    if (!isCallbackCall())        return CB->arg_size();      // Subtract 1 for the callee encoding.      return CI.ParameterEncoding.size() - 1; @@ -169,7 +169,7 @@ public:    /// Return the operand index of the underlying instruction associated with    /// the function parameter number @p ArgNo or -1 if there is none.    int getCallArgOperandNo(unsigned ArgNo) const { -    if (isDirectCall()) +    if (!isCallbackCall())        return ArgNo;      // Add 1 for the callee encoding.      return CI.ParameterEncoding[ArgNo + 1]; @@ -183,7 +183,7 @@ public:    /// Return the operand of the underlying instruction associated with the    /// function parameter number @p ArgNo or nullptr if there is none.    Value *getCallArgOperand(unsigned ArgNo) const { -    if (isDirectCall()) +    if (!isCallbackCall())        return CB->getArgOperand(ArgNo);      // Add 1 for the callee encoding.      return CI.ParameterEncoding[ArgNo + 1] >= 0 @@ -210,7 +210,7 @@ public:    /// Return the pointer to function that is being called.    Value *getCalledOperand() const { -    if (isDirectCall()) +    if (!isCallbackCall())        return CB->getCalledOperand();      return CB->getArgOperand(getCallArgOperandNoForCallee());    } diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index f3839c9..4228ec9 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -209,10 +209,15 @@ namespace llvm {      /// \param NumExtraInhabitants The number of extra inhabitants of the type.      /// An extra inhabitant is a bit pattern that does not represent a valid      /// value for instances of a given type. This is used by the Swift language. +    /// \param DataSizeInBits Optionally describes the number of bits used by +    /// the value of the object when this is less than the storage size of +    /// SizeInBits. Default value of zero indicates the object value and storage +    /// sizes are equal.      LLVM_ABI DIBasicType *      createBasicType(StringRef Name, uint64_t SizeInBits, unsigned Encoding,                      DINode::DIFlags Flags = DINode::FlagZero, -                    uint32_t NumExtraInhabitants = 0); +                    uint32_t NumExtraInhabitants = 0, +                    uint32_t DataSizeInBits = 0);      /// Create debugging information entry for a binary fixed-point type.      /// \param Name        Type name. diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index c626efc..7ade6b8 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -891,96 +891,114 @@ class DIBasicType : public DIType {    friend class MDNode;    unsigned Encoding; +  /// Describes the number of bits used by the value of the object. Non-zero +  /// when the value of an object does not fully occupy the storage size +  /// specified by SizeInBits. +  uint32_t DataSizeInBits;  protected:    DIBasicType(LLVMContext &C, StorageType Storage, unsigned Tag,                uint32_t AlignInBits, unsigned Encoding, -              uint32_t NumExtraInhabitants, DIFlags Flags, -              ArrayRef<Metadata *> Ops) +              uint32_t NumExtraInhabitants, uint32_t DataSizeInBits, +              DIFlags Flags, ArrayRef<Metadata *> Ops)        : DIType(C, DIBasicTypeKind, Storage, Tag, 0, AlignInBits,                 NumExtraInhabitants, Flags, Ops), -        Encoding(Encoding) {} +        Encoding(Encoding), DataSizeInBits(DataSizeInBits) {}    DIBasicType(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag,                uint32_t AlignInBits, unsigned Encoding, -              uint32_t NumExtraInhabitants, DIFlags Flags, -              ArrayRef<Metadata *> Ops) +              uint32_t NumExtraInhabitants, uint32_t DataSizeInBits, +              DIFlags Flags, ArrayRef<Metadata *> Ops)        : DIType(C, ID, Storage, Tag, 0, AlignInBits, NumExtraInhabitants, Flags,                 Ops), -        Encoding(Encoding) {} +        Encoding(Encoding), DataSizeInBits(DataSizeInBits) {}    ~DIBasicType() = default;    static DIBasicType *getImpl(LLVMContext &Context, unsigned Tag,                                StringRef Name, uint64_t SizeInBits,                                uint32_t AlignInBits, unsigned Encoding, -                              uint32_t NumExtraInhabitants, DIFlags Flags, +                              uint32_t NumExtraInhabitants, +                              uint32_t DataSizeInBits, DIFlags Flags,                                StorageType Storage, bool ShouldCreate = true) {      return getImpl(Context, Tag, getCanonicalMDString(Context, Name),                     SizeInBits, AlignInBits, Encoding, NumExtraInhabitants, -                   Flags, Storage, ShouldCreate); +                   DataSizeInBits, Flags, Storage, ShouldCreate);    }    static DIBasicType *getImpl(LLVMContext &Context, unsigned Tag,                                MDString *Name, uint64_t SizeInBits,                                uint32_t AlignInBits, unsigned Encoding, -                              uint32_t NumExtraInhabitants, DIFlags Flags, +                              uint32_t NumExtraInhabitants, +                              uint32_t DataSizeInBits, DIFlags Flags,                                StorageType Storage, bool ShouldCreate = true) {      auto *SizeInBitsNode = ConstantAsMetadata::get(          ConstantInt::get(Type::getInt64Ty(Context), SizeInBits));      return getImpl(Context, Tag, Name, SizeInBitsNode, AlignInBits, Encoding, -                   NumExtraInhabitants, Flags, Storage, ShouldCreate); +                   NumExtraInhabitants, DataSizeInBits, Flags, Storage, +                   ShouldCreate);    } -  LLVM_ABI static DIBasicType *getImpl(LLVMContext &Context, unsigned Tag, -                                       MDString *Name, Metadata *SizeInBits, -                                       uint32_t AlignInBits, unsigned Encoding, -                                       uint32_t NumExtraInhabitants, -                                       DIFlags Flags, StorageType Storage, -                                       bool ShouldCreate = true); +  LLVM_ABI static DIBasicType * +  getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, +          Metadata *SizeInBits, uint32_t AlignInBits, unsigned Encoding, +          uint32_t NumExtraInhabitants, uint32_t DataSizeInBits, DIFlags Flags, +          StorageType Storage, bool ShouldCreate = true);    TempDIBasicType cloneImpl() const {      return getTemporary(getContext(), getTag(), getRawName(),                          getRawSizeInBits(), getAlignInBits(), getEncoding(), -                        getNumExtraInhabitants(), getFlags()); +                        getNumExtraInhabitants(), getDataSizeInBits(), +                        getFlags());    }  public:    DEFINE_MDNODE_GET(DIBasicType, (unsigned Tag, StringRef Name), -                    (Tag, Name, 0, 0, 0, 0, FlagZero)) +                    (Tag, Name, 0, 0, 0, 0, 0, FlagZero))    DEFINE_MDNODE_GET(DIBasicType,                      (unsigned Tag, StringRef Name, uint64_t SizeInBits), -                    (Tag, Name, SizeInBits, 0, 0, 0, FlagZero)) +                    (Tag, Name, SizeInBits, 0, 0, 0, 0, FlagZero))    DEFINE_MDNODE_GET(DIBasicType,                      (unsigned Tag, MDString *Name, uint64_t SizeInBits), -                    (Tag, Name, SizeInBits, 0, 0, 0, FlagZero)) +                    (Tag, Name, SizeInBits, 0, 0, 0, 0, FlagZero))    DEFINE_MDNODE_GET(DIBasicType,                      (unsigned Tag, StringRef Name, uint64_t SizeInBits,                       uint32_t AlignInBits, unsigned Encoding, DIFlags Flags), -                    (Tag, Name, SizeInBits, AlignInBits, Encoding, 0, Flags)) +                    (Tag, Name, SizeInBits, AlignInBits, Encoding, 0, 0, Flags))    DEFINE_MDNODE_GET(DIBasicType,                      (unsigned Tag, MDString *Name, uint64_t SizeInBits,                       uint32_t AlignInBits, unsigned Encoding, DIFlags Flags), -                    (Tag, Name, SizeInBits, AlignInBits, Encoding, 0, Flags)) +                    (Tag, Name, SizeInBits, AlignInBits, Encoding, 0, 0, Flags))    DEFINE_MDNODE_GET(DIBasicType,                      (unsigned Tag, StringRef Name, uint64_t SizeInBits,                       uint32_t AlignInBits, unsigned Encoding,                       uint32_t NumExtraInhabitants, DIFlags Flags),                      (Tag, Name, SizeInBits, AlignInBits, Encoding, -                     NumExtraInhabitants, Flags)) +                     NumExtraInhabitants, 0, Flags)) +  DEFINE_MDNODE_GET(DIBasicType, +                    (unsigned Tag, StringRef Name, uint64_t SizeInBits, +                     uint32_t AlignInBits, unsigned Encoding, +                     uint32_t NumExtraInhabitants, uint32_t DataSizeInBits, +                     DIFlags Flags), +                    (Tag, Name, SizeInBits, AlignInBits, Encoding, +                     NumExtraInhabitants, DataSizeInBits, Flags))    DEFINE_MDNODE_GET(DIBasicType,                      (unsigned Tag, MDString *Name, uint64_t SizeInBits,                       uint32_t AlignInBits, unsigned Encoding, -                     uint32_t NumExtraInhabitants, DIFlags Flags), +                     uint32_t NumExtraInhabitants, uint32_t DataSizeInBits, +                     DIFlags Flags),                      (Tag, Name, SizeInBits, AlignInBits, Encoding, -                     NumExtraInhabitants, Flags)) +                     NumExtraInhabitants, DataSizeInBits, Flags))    DEFINE_MDNODE_GET(DIBasicType,                      (unsigned Tag, MDString *Name, Metadata *SizeInBits,                       uint32_t AlignInBits, unsigned Encoding, -                     uint32_t NumExtraInhabitants, DIFlags Flags), +                     uint32_t NumExtraInhabitants, uint32_t DataSizeInBits, +                     DIFlags Flags),                      (Tag, Name, SizeInBits, AlignInBits, Encoding, -                     NumExtraInhabitants, Flags)) +                     NumExtraInhabitants, DataSizeInBits, Flags))    TempDIBasicType clone() const { return cloneImpl(); }    unsigned getEncoding() const { return Encoding; } +  uint32_t getDataSizeInBits() const { return DataSizeInBits; } +    enum class Signedness { Signed, Unsigned };    /// Return the signedness of this type, or std::nullopt if this type is @@ -1010,7 +1028,7 @@ class DIFixedPointType : public DIBasicType {                     uint32_t AlignInBits, unsigned Encoding, DIFlags Flags,                     unsigned Kind, int Factor, ArrayRef<Metadata *> Ops)        : DIBasicType(C, DIFixedPointTypeKind, Storage, Tag, AlignInBits, -                    Encoding, 0, Flags, Ops), +                    Encoding, 0, 0, Flags, Ops),          Kind(Kind), Factor(Factor) {      assert(Kind == FixedPointBinary || Kind == FixedPointDecimal);    } @@ -1019,7 +1037,7 @@ class DIFixedPointType : public DIBasicType {                     unsigned Kind, APInt Numerator, APInt Denominator,                     ArrayRef<Metadata *> Ops)        : DIBasicType(C, DIFixedPointTypeKind, Storage, Tag, AlignInBits, -                    Encoding, 0, Flags, Ops), +                    Encoding, 0, 0, Flags, Ops),          Kind(Kind), Factor(0), Numerator(Numerator), Denominator(Denominator) {      assert(Kind == FixedPointRational);    } @@ -1028,7 +1046,7 @@ class DIFixedPointType : public DIBasicType {                     unsigned Kind, int Factor, APInt Numerator,                     APInt Denominator, ArrayRef<Metadata *> Ops)        : DIBasicType(C, DIFixedPointTypeKind, Storage, Tag, AlignInBits, -                    Encoding, 0, Flags, Ops), +                    Encoding, 0, 0, Flags, Ops),          Kind(Kind), Factor(Factor), Numerator(Numerator),          Denominator(Denominator) {}    ~DIFixedPointType() = default; diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index dacda0a..972a253 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -2191,7 +2191,7 @@ public:                        FMFSource);    }    Value *CreatePtrToAddr(Value *V, const Twine &Name = "") { -    return CreateCast(Instruction::PtrToInt, V, +    return CreateCast(Instruction::PtrToAddr, V,                        BB->getDataLayout().getAddressType(V->getType()), Name);    }    Value *CreatePtrToInt(Value *V, Type *DestTy, diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 3b7077c..d6b8563 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -153,6 +153,8 @@ def int_dx_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrCon  def int_dx_wave_getlaneindex : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrConvergent, IntrNoMem]>;  def int_dx_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;  def int_dx_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; +def int_dx_wave_reduce_min : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; +def int_dx_wave_reduce_umin : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;  def int_dx_wave_reduce_sum : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;  def int_dx_wave_reduce_usum : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;  def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 49a182be..bc51fb6 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -122,6 +122,8 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]    def int_spv_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;    def int_spv_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;    def int_spv_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; +  def int_spv_wave_reduce_min : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; +  def int_spv_wave_reduce_umin : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;    def int_spv_wave_reduce_sum : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;    def int_spv_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;    def int_spv_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; @@ -136,7 +138,7 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]    def int_spv_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;    def int_spv_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; -  // Create resource handle given the binding information. Returns a  +  // Create resource handle given the binding information. Returns a    // type appropriate for the kind of resource given the set id, binding id,    // array size of the binding, as well as an index and an indicator    // whether that index may be non-uniform. diff --git a/llvm/include/llvm/IR/Mangler.h b/llvm/include/llvm/IR/Mangler.h index 232101a..4d387ba 100644 --- a/llvm/include/llvm/IR/Mangler.h +++ b/llvm/include/llvm/IR/Mangler.h @@ -80,8 +80,7 @@ getArm64ECDemangledFunctionName(StringRef Name);  /// Check if an ARM64EC function name is mangled.  bool inline isArm64ECMangledFunctionName(StringRef Name) { -  return Name[0] == '#' || -         (Name[0] == '?' && Name.find("@$$h") != StringRef::npos); +  return Name[0] == '#' || (Name[0] == '?' && Name.contains("@$$h"));  }  } // End llvm namespace diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index 7be1b65..24c1b03 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -1585,7 +1585,7 @@ def __aeabi_f2ulz : RuntimeLibcallImpl<FPTOUINT_F32_I64>; // CallingConv::ARM_AA  // RTABI chapter 4.1.2, Table 7  def __aeabi_d2f : RuntimeLibcallImpl<FPROUND_F64_F32>; // CallingConv::ARM_AAPCS  def __aeabi_d2h : RuntimeLibcallImpl<FPROUND_F64_F16>; // CallingConv::ARM_AAPCS -def  __aeabi_f2d : RuntimeLibcallImpl<FPEXT_F32_F64>; // CallingConv::ARM_AAPCS +def __aeabi_f2d : RuntimeLibcallImpl<FPEXT_F32_F64>; // CallingConv::ARM_AAPCS  // Integer to floating-point conversions.  // RTABI chapter 4.1.2, Table 8 diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index 59f63eb..03d5ee2 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -278,9 +278,46 @@ private:    std::vector<Elf_Shdr> FakeSections;    SmallString<0> FakeSectionStrings; +  // When the number of program headers is >= PN_XNUM, the actual number is +  // contained in the sh_info field of the section header at index 0. +  std::optional<uint32_t> RealPhNum; +  // When the number of section headers is >= SHN_LORESERVE, the actual number +  // is contained in the sh_size field of the section header at index 0. +  std::optional<uint64_t> RealShNum; +  // When the section index of the section name table is >= SHN_LORESERVE, the +  // actual number is contained in the sh_link field of the section header at +  // index 0. +  std::optional<uint32_t> RealShStrNdx; +    ELFFile(StringRef Object); +  Error readShdrZero(); +  public: +  Expected<uint32_t> getPhNum() const { +    if (!RealPhNum) { +      if (Error E = const_cast<ELFFile<ELFT> *>(this)->readShdrZero()) +        return std::move(E); +    } +    return *RealPhNum; +  } + +  Expected<uint64_t> getShNum() const { +    if (!RealShNum) { +      if (Error E = const_cast<ELFFile<ELFT> *>(this)->readShdrZero()) +        return std::move(E); +    } +    return *RealShNum; +  } + +  Expected<uint32_t> getShStrNdx() const { +    if (!RealShStrNdx) { +      if (Error E = const_cast<ELFFile<ELFT> *>(this)->readShdrZero()) +        return std::move(E); +    } +    return *RealShStrNdx; +  } +    const Elf_Ehdr &getHeader() const {      return *reinterpret_cast<const Elf_Ehdr *>(base());    } @@ -379,22 +416,26 @@ public:    /// Iterate over program header table.    Expected<Elf_Phdr_Range> program_headers() const { -    if (getHeader().e_phnum && getHeader().e_phentsize != sizeof(Elf_Phdr)) +    uint32_t NumPh; +    if (Expected<uint32_t> PhNumOrErr = getPhNum()) +      NumPh = *PhNumOrErr; +    else +      return PhNumOrErr.takeError(); +    if (NumPh && getHeader().e_phentsize != sizeof(Elf_Phdr))        return createError("invalid e_phentsize: " +                           Twine(getHeader().e_phentsize)); -    uint64_t HeadersSize = -        (uint64_t)getHeader().e_phnum * getHeader().e_phentsize; +    uint64_t HeadersSize = (uint64_t)NumPh * getHeader().e_phentsize;      uint64_t PhOff = getHeader().e_phoff;      if (PhOff + HeadersSize < PhOff || PhOff + HeadersSize > getBufSize())        return createError("program headers are longer than binary of size " +                           Twine(getBufSize()) + ": e_phoff = 0x" +                           Twine::utohexstr(getHeader().e_phoff) + -                         ", e_phnum = " + Twine(getHeader().e_phnum) + +                         ", e_phnum = " + Twine(NumPh) +                           ", e_phentsize = " + Twine(getHeader().e_phentsize));      auto *Begin = reinterpret_cast<const Elf_Phdr *>(base() + PhOff); -    return ArrayRef(Begin, Begin + getHeader().e_phnum); +    return ArrayRef(Begin, Begin + NumPh);    }    /// Get an iterator over notes in a program header. @@ -772,19 +813,15 @@ template <class ELFT>  Expected<StringRef>  ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections,                                       WarningHandler WarnHandler) const { -  uint32_t Index = getHeader().e_shstrndx; -  if (Index == ELF::SHN_XINDEX) { -    // If the section name string table section index is greater than -    // or equal to SHN_LORESERVE, then the actual index of the section name -    // string table section is contained in the sh_link field of the section -    // header at index 0. -    if (Sections.empty()) -      return createError( -          "e_shstrndx == SHN_XINDEX, but the section header table is empty"); +  Expected<uint32_t> ShStrNdxOrErr = getShStrNdx(); +  if (!ShStrNdxOrErr) +    return ShStrNdxOrErr.takeError(); -    Index = Sections[0].sh_link; -  } +  if (*ShStrNdxOrErr == ELF::SHN_XINDEX && Sections.empty()) +    return createError( +        "e_shstrndx == SHN_XINDEX, but the section header table is empty"); +  uint32_t Index = *ShStrNdxOrErr;    // There is no section name string table. Return FakeSectionStrings which    // is non-empty if we have created fake sections.    if (!Index) @@ -891,6 +928,35 @@ Expected<uint64_t> ELFFile<ELFT>::getDynSymtabSize() const {  template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {} +template <class ELFT> Error ELFFile<ELFT>::readShdrZero() { +  const Elf_Ehdr &Header = getHeader(); + +  if ((Header.e_phnum == ELF::PN_XNUM || Header.e_shnum == 0 || +       Header.e_shstrndx == ELF::SHN_XINDEX) && +      Header.e_shoff != 0) { +    // Pretend we have section 0 or sections() would call getShNum and thus +    // become an infinite recursion. +    RealShNum = 1; +    auto SecOrErr = getSection(0); +    if (!SecOrErr) { +      RealShNum = std::nullopt; +      return SecOrErr.takeError(); +    } + +    RealPhNum = +        Header.e_phnum == ELF::PN_XNUM ? (*SecOrErr)->sh_info : Header.e_phnum; +    RealShNum = Header.e_shnum == 0 ? (*SecOrErr)->sh_size : Header.e_shnum; +    RealShStrNdx = Header.e_shstrndx == ELF::SHN_XINDEX ? (*SecOrErr)->sh_link +                                                        : Header.e_shstrndx; +  } else { +    RealPhNum = Header.e_phnum; +    RealShNum = Header.e_shnum; +    RealShStrNdx = Header.e_shstrndx; +  } + +  return Error::success(); +} +  template <class ELFT>  Expected<ELFFile<ELFT>> ELFFile<ELFT>::create(StringRef Object) {    if (sizeof(Elf_Ehdr) > Object.size()) @@ -956,9 +1022,11 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {    const Elf_Shdr *First =        reinterpret_cast<const Elf_Shdr *>(base() + SectionTableOffset); -  uintX_t NumSections = getHeader().e_shnum; -  if (NumSections == 0) -    NumSections = First->sh_size; +  uintX_t NumSections = 0; +  if (Expected<uint64_t> ShNumOrErr = getShNum()) +    NumSections = *ShNumOrErr; +  else +    return ShNumOrErr.takeError();    if (NumSections > UINT64_MAX / sizeof(Elf_Shdr))      return createError("invalid number of sections specified in the NULL " diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index e9a417d..467ab6f 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -834,30 +834,32 @@ struct BBAddrMap {      bool OmitBBEntries : 1;      bool CallsiteEndOffsets : 1;      bool BBHash : 1; +    bool PostLinkCfg : 1;      bool hasPGOAnalysis() const { return FuncEntryCount || BBFreq || BrProb; }      bool hasPGOAnalysisBBData() const { return BBFreq || BrProb; }      // Encodes to minimum bit width representation. -    uint8_t encode() const { -      return (static_cast<uint8_t>(FuncEntryCount) << 0) | -             (static_cast<uint8_t>(BBFreq) << 1) | -             (static_cast<uint8_t>(BrProb) << 2) | -             (static_cast<uint8_t>(MultiBBRange) << 3) | -             (static_cast<uint8_t>(OmitBBEntries) << 4) | -             (static_cast<uint8_t>(CallsiteEndOffsets) << 5) | -             (static_cast<uint8_t>(BBHash) << 6); +    uint16_t encode() const { +      return (static_cast<uint16_t>(FuncEntryCount) << 0) | +             (static_cast<uint16_t>(BBFreq) << 1) | +             (static_cast<uint16_t>(BrProb) << 2) | +             (static_cast<uint16_t>(MultiBBRange) << 3) | +             (static_cast<uint16_t>(OmitBBEntries) << 4) | +             (static_cast<uint16_t>(CallsiteEndOffsets) << 5) | +             (static_cast<uint16_t>(BBHash) << 6) | +             (static_cast<uint16_t>(PostLinkCfg) << 7);      }      // Decodes from minimum bit width representation and validates no      // unnecessary bits are used. -    static Expected<Features> decode(uint8_t Val) { +    static Expected<Features> decode(uint16_t Val) {        Features Feat{            static_cast<bool>(Val & (1 << 0)), static_cast<bool>(Val & (1 << 1)),            static_cast<bool>(Val & (1 << 2)), static_cast<bool>(Val & (1 << 3)),            static_cast<bool>(Val & (1 << 4)), static_cast<bool>(Val & (1 << 5)), -          static_cast<bool>(Val & (1 << 6))}; +          static_cast<bool>(Val & (1 << 6)), static_cast<bool>(Val & (1 << 7))};        if (Feat.encode() != Val)          return createStringError(              std::error_code(), "invalid encoding for BBAddrMap::Features: 0x%x", @@ -867,10 +869,11 @@ struct BBAddrMap {      bool operator==(const Features &Other) const {        return std::tie(FuncEntryCount, BBFreq, BrProb, MultiBBRange, -                      OmitBBEntries, CallsiteEndOffsets, BBHash) == +                      OmitBBEntries, CallsiteEndOffsets, BBHash, PostLinkCfg) ==               std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb,                        Other.MultiBBRange, Other.OmitBBEntries, -                      Other.CallsiteEndOffsets, Other.BBHash); +                      Other.CallsiteEndOffsets, Other.BBHash, +                      Other.PostLinkCfg);      }    }; @@ -1010,23 +1013,30 @@ struct PGOAnalysisMap {      /// probability associated with it.      struct SuccessorEntry {        /// Unique ID of this successor basic block. -      uint32_t ID; +      uint32_t ID = 0;        /// Branch Probability of the edge to this successor taken from MBPI.        BranchProbability Prob; +      /// Raw edge count from the post link profile (e.g., from bolt or +      /// propeller). +      uint64_t PostLinkFreq = 0;        bool operator==(const SuccessorEntry &Other) const { -        return std::tie(ID, Prob) == std::tie(Other.ID, Other.Prob); +        return std::tie(ID, Prob, PostLinkFreq) == +               std::tie(Other.ID, Other.Prob, Other.PostLinkFreq);        }      };      /// Block frequency taken from MBFI      BlockFrequency BlockFreq; +    /// Raw block count taken from the post link profile (e.g., from bolt or +    /// propeller). +    uint64_t PostLinkBlockFreq = 0;      /// List of successors of the current block      llvm::SmallVector<SuccessorEntry, 2> Successors;      bool operator==(const PGOBBEntry &Other) const { -      return std::tie(BlockFreq, Successors) == -             std::tie(Other.BlockFreq, Other.Successors); +      return std::tie(BlockFreq, PostLinkBlockFreq, Successors) == +             std::tie(Other.BlockFreq, PostLinkBlockFreq, Other.Successors);      }    }; diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index a7c7c7c..a8236ca 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -166,7 +166,7 @@ struct BBAddrMapEntry {      std::optional<llvm::yaml::Hex64> Hash;    };    uint8_t Version; -  llvm::yaml::Hex8 Feature; +  llvm::yaml::Hex16 Feature;    struct BBRangeEntry {      llvm::yaml::Hex64 BaseAddress; @@ -203,8 +203,10 @@ struct PGOAnalysisMapEntry {      struct SuccessorEntry {        uint32_t ID;        llvm::yaml::Hex32 BrProb; +      std::optional<uint32_t> PostLinkBrFreq;      };      std::optional<uint64_t> BBFreq; +    std::optional<uint32_t> PostLinkBBFreq;      std::optional<std::vector<SuccessorEntry>> Successors;    };    std::optional<uint64_t> FuncEntryCount; diff --git a/llvm/include/llvm/Support/AutoConvert.h b/llvm/include/llvm/Support/AutoConvert.h index 1e67926..15f1ec8 100644 --- a/llvm/include/llvm/Support/AutoConvert.h +++ b/llvm/include/llvm/Support/AutoConvert.h @@ -18,6 +18,7 @@  #include <_Ccsid.h>  #endif  #ifdef __cplusplus +#include "llvm/ADT/Twine.h"  #include "llvm/Support/Error.h"  #include <system_error>  #endif /* __cplusplus */ @@ -47,12 +48,12 @@ namespace llvm {  std::error_code setzOSFileTag(int FD, int CCSID, bool Text);  /** \brief Get the the tag ccsid for a file name or a file descriptor. */ -ErrorOr<__ccsid_t> getzOSFileTag(const char *FileName, const int FD = -1); +ErrorOr<__ccsid_t> getzOSFileTag(const Twine &FileName, const int FD = -1);  /** \brief Query the file tag to determine if it needs conversion to UTF-8   *  codepage.   */ -ErrorOr<bool> needzOSConversion(const char *FileName, const int FD = -1); +ErrorOr<bool> needzOSConversion(const Twine &FileName, const int FD = -1);  #endif /* __MVS__*/ @@ -87,7 +88,7 @@ inline std::error_code setFileTag(int FD, int CCSID, bool Text) {    return std::error_code();  } -inline ErrorOr<bool> needConversion(const char *FileName, const int FD = -1) { +inline ErrorOr<bool> needConversion(const Twine &FileName, const int FD = -1) {  #ifdef __MVS__    return needzOSConversion(FileName, FD);  #endif diff --git a/llvm/include/llvm/Support/FormattedStream.h b/llvm/include/llvm/Support/FormattedStream.h index 011a6ae..402cd3e 100644 --- a/llvm/include/llvm/Support/FormattedStream.h +++ b/llvm/include/llvm/Support/FormattedStream.h @@ -180,7 +180,8 @@ public:      return *this;    } -  raw_ostream &changeColor(enum Colors Color, bool Bold, bool BG) override { +  raw_ostream &changeColor(enum Colors Color, bool Bold = false, +                           bool BG = false) override {      if (colors_enabled()) {        DisableScanScope S(this);        raw_ostream::changeColor(Color, Bold, BG); diff --git a/llvm/include/llvm/Support/GenericLoopInfo.h b/llvm/include/llvm/Support/GenericLoopInfo.h index 2775a87..b6bb360 100644 --- a/llvm/include/llvm/Support/GenericLoopInfo.h +++ b/llvm/include/llvm/Support/GenericLoopInfo.h @@ -615,6 +615,17 @@ public:      return L ? L->getLoopDepth() : 0;    } +  /// \brief Find the innermost loop containing both given loops. +  /// +  /// \returns the innermost loop containing both \p A and \p B +  ///          or nullptr if there is no such loop. +  LoopT *getSmallestCommonLoop(LoopT *A, LoopT *B) const; +  /// \brief Find the innermost loop containing both given blocks. +  /// +  /// \returns the innermost loop containing both \p A and \p B +  ///          or nullptr if there is no such loop. +  LoopT *getSmallestCommonLoop(BlockT *A, BlockT *B) const; +    // True if the block is a loop header node    bool isLoopHeader(const BlockT *BB) const {      const LoopT *L = getLoopFor(BB); diff --git a/llvm/include/llvm/Support/GenericLoopInfoImpl.h b/llvm/include/llvm/Support/GenericLoopInfoImpl.h index 6fc508b..5416780 100644 --- a/llvm/include/llvm/Support/GenericLoopInfoImpl.h +++ b/llvm/include/llvm/Support/GenericLoopInfoImpl.h @@ -355,7 +355,7 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {      if (BB == getHeader()) {        assert(!OutsideLoopPreds.empty() && "Loop is unreachable!");      } else if (!OutsideLoopPreds.empty()) { -      // A non-header loop shouldn't be reachable from outside the loop, +      // A non-header loop block shouldn't be reachable from outside the loop,        // though it is permitted if the predecessor is not itself actually        // reachable.        BlockT *EntryBB = &BB->getParent()->front(); @@ -645,6 +645,36 @@ LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() const {    return PreOrderLoops;  } +template <class BlockT, class LoopT> +LoopT *LoopInfoBase<BlockT, LoopT>::getSmallestCommonLoop(LoopT *A, +                                                          LoopT *B) const { +  if (!A || !B) +    return nullptr; + +  // If lops A and B have different depth replace them with parent loop +  // until they have the same depth. +  while (A->getLoopDepth() > B->getLoopDepth()) +    A = A->getParentLoop(); +  while (B->getLoopDepth() > A->getLoopDepth()) +    B = B->getParentLoop(); + +  // Loops A and B are at same depth but may be disjoint, replace them with +  // parent loops until we find loop that contains both or we run out of +  // parent loops. +  while (A != B) { +    A = A->getParentLoop(); +    B = B->getParentLoop(); +  } + +  return A; +} + +template <class BlockT, class LoopT> +LoopT *LoopInfoBase<BlockT, LoopT>::getSmallestCommonLoop(BlockT *A, +                                                          BlockT *B) const { +  return getSmallestCommonLoop(getLoopFor(A), getLoopFor(B)); +} +  // Debugging  template <class BlockT, class LoopT>  void LoopInfoBase<BlockT, LoopT>::print(raw_ostream &OS) const { diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h index 898b4ea..4e2262fb 100644 --- a/llvm/include/llvm/Support/LEB128.h +++ b/llvm/include/llvm/Support/LEB128.h @@ -29,8 +29,7 @@ inline unsigned encodeSLEB128(int64_t Value, raw_ostream &OS,      uint8_t Byte = Value & 0x7f;      // NOTE: this assumes that this signed shift is an arithmetic right shift.      Value >>= 7; -    More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || -              ((Value == -1) && ((Byte & 0x40) != 0)))); +    More = Value != ((Byte & 0x40) ? -1 : 0);      Count++;      if (More || Count < PadTo)        Byte |= 0x80; // Mark this byte to show that more bytes will follow. @@ -58,8 +57,7 @@ inline unsigned encodeSLEB128(int64_t Value, uint8_t *p, unsigned PadTo = 0) {      uint8_t Byte = Value & 0x7f;      // NOTE: this assumes that this signed shift is an arithmetic right shift.      Value >>= 7; -    More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || -              ((Value == -1) && ((Byte & 0x40) != 0)))); +    More = Value != ((Byte & 0x40) ? -1 : 0);      Count++;      if (More || Count < PadTo)        Byte |= 0x80; // Mark this byte to show that more bytes will follow. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 47d5d68..119695e 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1013,10 +1013,18 @@ def extract_vec_elt_combines : GICombineGroup<[  def funnel_shift_from_or_shift : GICombineRule<    (defs root:$root, build_fn_matchinfo:$info),    (match (wip_match_opcode G_OR):$root, -    [{ return Helper.matchOrShiftToFunnelShift(*${root}, ${info}); }]), +    [{ return Helper.matchOrShiftToFunnelShift(*${root}, false, ${info}); }]),    (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])  >; +def funnel_shift_from_or_shift_constants_are_legal : GICombineRule< +  (defs root:$root, build_fn_matchinfo:$info), +  (match (wip_match_opcode G_OR):$root, +    [{ return Helper.matchOrShiftToFunnelShift(*${root}, true, ${info}); }]), +  (apply [{ Helper.applyBuildFn(*${root}, ${info}); }]) +>; + +  def funnel_shift_to_rotate : GICombineRule<    (defs root:$root),    (match (wip_match_opcode G_FSHL, G_FSHR):$root, diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h index ced446d..9dcd4b5 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -26,8 +26,6 @@  namespace llvm { -LLVM_ABI extern cl::opt<bool> DebugInfoCorrelate; -  class Function;  class Instruction;  class Module; diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h index e677cbf..49885b7 100644 --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -19,6 +19,7 @@  #include "llvm/ADT/ArrayRef.h"  #include "llvm/ADT/SetVector.h"  #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CycleInfo.h"  #include "llvm/IR/Dominators.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Printable.h" @@ -262,6 +263,34 @@ LLVM_ABI BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To,                                 MemorySSAUpdater *MSSAU = nullptr,                                 const Twine &BBName = ""); +/// \brief Create a new intermediate target block for a callbr edge. +/// +/// Create a new basic block between a callbr instruction and one of its +/// successors. The new block replaces the original successor in the callbr +/// instruction and unconditionally branches to the original successor. This +/// is useful for normalizing control flow, e.g., when transforming +/// irreducible loops. +/// +/// \param CallBrBlock    block containing the callbr instruction +/// \param Succ           original successor block +/// \param SuccIdx        index of the original successor in the callbr +///                       instruction +/// \param DTU            optional \p DomTreeUpdater for updating the +///                       dominator tree +/// \param CI             optional \p CycleInfo for updating cycle membership +/// \param LI             optional \p LoopInfo for updating loop membership +/// \param UpdatedLI      optional output flag indicating if \p LoopInfo has +///                       been updated +/// +/// \returns newly created intermediate target block +/// +/// \note This function updates PHI nodes, dominator tree, loop info, and +/// cycle info as needed. +LLVM_ABI BasicBlock * +SplitCallBrEdge(BasicBlock *CallBrBlock, BasicBlock *Succ, unsigned SuccIdx, +                DomTreeUpdater *DTU = nullptr, CycleInfo *CI = nullptr, +                LoopInfo *LI = nullptr, bool *UpdatedLI = nullptr); +  /// Sets the unwind edge of an instruction to a particular successor.  LLVM_ABI void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ); diff --git a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h index 810fef2..17cde82 100644 --- a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h +++ b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h @@ -15,10 +15,13 @@  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/StringRef.h" +#include "llvm/IR/CycleInfo.h"  namespace llvm {  class BasicBlock; +class CallBrInst; +class LoopInfo;  class DomTreeUpdater;  /// Given a set of branch descriptors [BB, Succ0, Succ1], create a "hub" such @@ -104,7 +107,8 @@ struct ControlFlowHub {          : BB(BB), Succ0(Succ0), Succ1(Succ1) {}    }; -  void addBranch(BasicBlock *BB, BasicBlock *Succ0, BasicBlock *Succ1) { +  void addBranch(BasicBlock *BB, BasicBlock *Succ0, +                 BasicBlock *Succ1 = nullptr) {      assert(BB);      assert(Succ0 || Succ1);      Branches.emplace_back(BB, Succ0, Succ1); | 
