diff options
Diffstat (limited to 'llvm/include/llvm')
52 files changed, 740 insertions, 332 deletions
diff --git a/llvm/include/llvm/ADT/CombinationGenerator.h b/llvm/include/llvm/ADT/CombinationGenerator.h index 6100aa9..bbdbd9b 100644 --- a/llvm/include/llvm/ADT/CombinationGenerator.h +++ b/llvm/include/llvm/ADT/CombinationGenerator.h @@ -118,10 +118,9 @@ public: : VariablesChoices(VariablesChoices_) { #ifndef NDEBUG assert(!VariablesChoices.empty() && "There should be some variables."); - llvm::for_each(VariablesChoices, [](ArrayRef<choice_type> VariableChoices) { + for (ArrayRef<choice_type> VariableChoices : VariablesChoices) assert(!VariableChoices.empty() && "There must always be some choice, at least a placeholder one."); - }); #endif } diff --git a/llvm/include/llvm/ADT/EquivalenceClasses.h b/llvm/include/llvm/ADT/EquivalenceClasses.h index b1009f8..1a2331c 100644 --- a/llvm/include/llvm/ADT/EquivalenceClasses.h +++ b/llvm/include/llvm/ADT/EquivalenceClasses.h @@ -218,12 +218,12 @@ public: /// insert - Insert a new value into the union/find set, ignoring the request /// if the value already exists. const ECValue &insert(const ElemTy &Data) { - auto I = TheMapping.insert({Data, nullptr}); - if (!I.second) - return *I.first->second; + auto [I, Inserted] = TheMapping.try_emplace(Data); + if (!Inserted) + return *I->second; auto *ECV = new (ECValueAllocator) ECValue(Data); - I.first->second = ECV; + I->second = ECV; Members.push_back(ECV); return *ECV; } diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h index 7bd2c87..81b9a68 100644 --- a/llvm/include/llvm/ADT/STLForwardCompat.h +++ b/llvm/include/llvm/ADT/STLForwardCompat.h @@ -55,21 +55,13 @@ using type_identity_t // NOLINT(readability-identifier-naming) // TODO: Remove this in favor of std::optional<T>::transform once we switch to // C++23. -template <typename T, typename Function> -auto transformOptional(const std::optional<T> &O, const Function &F) - -> std::optional<decltype(F(*O))> { - if (O) - return F(*O); - return std::nullopt; -} - -// TODO: Remove this in favor of std::optional<T>::transform once we switch to -// C++23. -template <typename T, typename Function> -auto transformOptional(std::optional<T> &&O, const Function &F) - -> std::optional<decltype(F(*std::move(O)))> { - if (O) - return F(*std::move(O)); +template <typename Optional, typename Function, + typename Value = typename llvm::remove_cvref_t<Optional>::value_type> +std::optional<std::invoke_result_t<Function, Value>> +transformOptional(Optional &&O, Function &&F) { + if (O) { + return F(*std::forward<Optional>(O)); + } return std::nullopt; } diff --git a/llvm/include/llvm/ADT/StringTable.h b/llvm/include/llvm/ADT/StringTable.h index c089a07..575b3c9 100644 --- a/llvm/include/llvm/ADT/StringTable.h +++ b/llvm/include/llvm/ADT/StringTable.h @@ -118,6 +118,13 @@ public: constexpr Iterator(const Iterator &RHS) = default; constexpr Iterator(Iterator &&RHS) = default; + Iterator &operator=(const Iterator &RHS) { + Table = RHS.Table; + O = RHS.O; + S = RHS.S; + return *this; + } + bool operator==(const Iterator &RHS) const { assert(Table == RHS.Table && "Compared iterators for unrelated tables!"); return O == RHS.O; @@ -132,6 +139,8 @@ public: O = O.value() + (*Table)[O].size() + 1; return *this; } + + Offset offset() const { return O; } }; constexpr Iterator begin() const { return Iterator(*this, 0); } diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h index 3d7edf0..d87457c 100644 --- a/llvm/include/llvm/Analysis/IR2Vec.h +++ b/llvm/include/llvm/Analysis/IR2Vec.h @@ -170,6 +170,10 @@ public: unsigned getDimension() const; size_t size() const; + static size_t expectedSize() { + return MaxOpcodes + MaxTypeIDs + MaxOperandKinds; + } + /// Helper function to get vocabulary key for a given Opcode static StringRef getVocabKeyForOpcode(unsigned Opcode); @@ -182,6 +186,11 @@ public: /// Helper function to classify an operand into OperandKind static OperandKind getOperandKind(const Value *Op); + /// Helpers to return the IDs of a given Opcode, TypeID, or OperandKind + static unsigned getNumericID(unsigned Opcode); + static unsigned getNumericID(Type::TypeID TypeID); + static unsigned getNumericID(const Value *Op); + /// Accessors to get the embedding for a given entity. const ir2vec::Embedding &operator[](unsigned Opcode) const; const ir2vec::Embedding &operator[](Type::TypeID TypeId) const; diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index b985292..1dc7320 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -47,6 +47,8 @@ enum class RecurKind { FMul, ///< Product of floats. FMin, ///< FP min implemented in terms of select(cmp()). FMax, ///< FP max implemented in terms of select(cmp()). + FMinNum, ///< FP min with llvm.minnum semantics including NaNs. + FMaxNum, ///< FP max with llvm.maxnum semantics including NaNs. FMinimum, ///< FP min with llvm.minimum semantics FMaximum, ///< FP max with llvm.maximum semantics FMinimumNum, ///< FP min with llvm.minimumnum semantics @@ -250,6 +252,7 @@ public: /// Returns true if the recurrence kind is a floating-point min/max kind. static bool isFPMinMaxRecurrenceKind(RecurKind Kind) { return Kind == RecurKind::FMin || Kind == RecurKind::FMax || + Kind == RecurKind::FMinNum || Kind == RecurKind::FMaxNum || Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum || Kind == RecurKind::FMinimumNum || Kind == RecurKind::FMaximumNum; } diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index b042a71..571caf9 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -102,6 +102,12 @@ private: // The maximum size of a cold allocation context, from the profile summary. uint64_t MaxColdSize; + // Tracks whether we have built the Trie from existing MD_memprof metadata. We + // apply different heuristics for determining whether to discard non-cold + // contexts when rebuilding as we have lost information available during the + // original profile match. + bool BuiltFromExistingMetadata = false; + void deleteTrieNode(CallStackTrieNode *Node) { if (!Node) return; diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index af1e0d7..9a2773c 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -24,6 +24,7 @@ namespace llvm { class TargetLibraryInfo; +class IntrinsicInst; /// The Vector Function Database. /// @@ -188,6 +189,10 @@ LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID); /// Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics. LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID); +/// Given a deinterleaveN intrinsic, return the (narrow) vector type of each +/// factor. +LLVM_ABI VectorType *getDeinterleavedVectorType(IntrinsicInst *DI); + /// Given a vector and an element number, see if the scalar value is /// already around as a register, for example if it were inserted then extracted /// from the vector. diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index c7e4bdf..a2311d2 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -181,6 +181,7 @@ enum Kind { kw_amdgpu_cs_chain_preserve, kw_amdgpu_kernel, kw_amdgpu_gfx, + kw_amdgpu_gfx_whole_wave, kw_tailcc, kw_m68k_rtdcc, kw_graalcc, diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 6bf2e17..e4f82ad 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -1159,6 +1159,7 @@ enum : unsigned { SHT_LLVM_OFFLOADING = 0x6fff4c0b, // LLVM device offloading data. SHT_LLVM_LTO = 0x6fff4c0c, // .llvm.lto for fat LTO. SHT_LLVM_JT_SIZES = 0x6fff4c0d, // LLVM jump tables sizes. + SHT_LLVM_CFI_JUMP_TABLE = 0x6fff4c0e, // LLVM CFI jump table. // Android's experimental support for SHT_RELR sections. // https://android.googlesource.com/platform/bionic/+/b7feec74547f84559a1467aca02708ff61346d2a/libc/include/elf.h#512 SHT_ANDROID_RELR = 0x6fffff00, // Relocation entries; only offsets. diff --git a/llvm/include/llvm/BinaryFormat/SFrame.h b/llvm/include/llvm/BinaryFormat/SFrame.h index 16d3b16..98dbe38 100644 --- a/llvm/include/llvm/BinaryFormat/SFrame.h +++ b/llvm/include/llvm/BinaryFormat/SFrame.h @@ -15,33 +15,36 @@ #ifndef LLVM_BINARYFORMAT_SFRAME_H #define LLVM_BINARYFORMAT_SFRAME_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Endian.h" -namespace llvm::sframe { +namespace llvm { + +template <typename T> struct EnumEntry; + +namespace sframe { LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); constexpr uint16_t Magic = 0xdee2; enum class Version : uint8_t { - V1 = 1, - V2 = 2, +#define HANDLE_SFRAME_VERSION(CODE, NAME) NAME = CODE, +#include "llvm/BinaryFormat/SFrameConstants.def" }; enum class Flags : uint8_t { - FDESorted = 0x01, - FramePointer = 0x02, - FDEFuncStartPCRel = 0x04, +#define HANDLE_SFRAME_FLAG(CODE, NAME) NAME = CODE, +#include "llvm/BinaryFormat/SFrameConstants.def" V2AllFlags = FDESorted | FramePointer | FDEFuncStartPCRel, LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xff), }; enum class ABI : uint8_t { - AArch64EndianBig = 1, - AArch64EndianLittle = 2, - AMD64EndianLittle = 3, +#define HANDLE_SFRAME_ABI(CODE, NAME) NAME = CODE, +#include "llvm/BinaryFormat/SFrameConstants.def" }; /// SFrame FRE Types. Bits 0-3 of FuncDescEntry.Info. @@ -160,6 +163,11 @@ template <endianness E> using FrameRowEntryAddr1 = FrameRowEntry<uint8_t, E>; template <endianness E> using FrameRowEntryAddr2 = FrameRowEntry<uint16_t, E>; template <endianness E> using FrameRowEntryAddr4 = FrameRowEntry<uint32_t, E>; -} // namespace llvm::sframe +ArrayRef<EnumEntry<Version>> getVersions(); +ArrayRef<EnumEntry<Flags>> getFlags(); +ArrayRef<EnumEntry<ABI>> getABIs(); + +} // namespace sframe +} // namespace llvm #endif // LLVM_BINARYFORMAT_SFRAME_H diff --git a/llvm/include/llvm/BinaryFormat/SFrameConstants.def b/llvm/include/llvm/BinaryFormat/SFrameConstants.def new file mode 100644 index 0000000..643b15f --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/SFrameConstants.def @@ -0,0 +1,39 @@ +//===- SFrameConstants.def --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if !(defined(HANDLE_SFRAME_VERSION) || defined(HANDLE_SFRAME_FLAG) || \ + defined(HANDLE_SFRAME_ABI)) +#error "Missing HANDLE_SFRAME definition" +#endif + +#ifndef HANDLE_SFRAME_VERSION +#define HANDLE_SFRAME_VERSION(CODE, NAME) +#endif + +#ifndef HANDLE_SFRAME_FLAG +#define HANDLE_SFRAME_FLAG(CODE, NAME) +#endif + +#ifndef HANDLE_SFRAME_ABI +#define HANDLE_SFRAME_ABI(CODE, NAME) +#endif + +HANDLE_SFRAME_VERSION(0x01, V1) +HANDLE_SFRAME_VERSION(0x02, V2) + +HANDLE_SFRAME_FLAG(0x01, FDESorted) +HANDLE_SFRAME_FLAG(0x02, FramePointer) +HANDLE_SFRAME_FLAG(0x04, FDEFuncStartPCRel) + +HANDLE_SFRAME_ABI(0x01, AArch64EndianBig) +HANDLE_SFRAME_ABI(0x02, AArch64EndianLittle) +HANDLE_SFRAME_ABI(0x03, AMD64EndianLittle) + +#undef HANDLE_SFRAME_VERSION +#undef HANDLE_SFRAME_FLAG +#undef HANDLE_SFRAME_ABI diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 31f1197b..da82904 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -700,18 +700,19 @@ public: /// Given an G_UDIV \p MI or G_UREM \p MI expressing a divide by constant, /// return an expression that implements it by multiplying by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". - MachineInstr *buildUDivorURemUsingMul(MachineInstr &MI) const; + MachineInstr *buildUDivOrURemUsingMul(MachineInstr &MI) const; /// Combine G_UDIV or G_UREM by constant into a multiply by magic constant. - bool matchUDivorURemByConst(MachineInstr &MI) const; - void applyUDivorURemByConst(MachineInstr &MI) const; - - /// Given an G_SDIV \p MI expressing a signed divide by constant, return an - /// expression that implements it by multiplying by a magic number. - /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". - MachineInstr *buildSDivUsingMul(MachineInstr &MI) const; - /// Combine G_SDIV by constant into a multiply by magic constant. - bool matchSDivByConst(MachineInstr &MI) const; - void applySDivByConst(MachineInstr &MI) const; + bool matchUDivOrURemByConst(MachineInstr &MI) const; + void applyUDivOrURemByConst(MachineInstr &MI) const; + + /// Given an G_SDIV \p MI or G_SREM \p MI expressing a signed divide by + /// constant, return an expression that implements it by multiplying by a + /// magic number. Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's + /// Guide". + MachineInstr *buildSDivOrSRemUsingMul(MachineInstr &MI) const; + /// Combine G_SDIV or G_SREM by constant into a multiply by magic constant. + bool matchSDivOrSRemByConst(MachineInstr &MI) const; + void applySDivOrSRemByConst(MachineInstr &MI) const; /// Given an G_SDIV \p MI expressing a signed divided by a pow2 constant, /// return expressions that implements it by shifting. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h index da73238..490d1a3 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h @@ -103,6 +103,20 @@ public: /// \return The known alignment for the pointer-like value \p R. Align computeKnownAlignment(Register R, unsigned Depth = 0); + /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts + /// that are all less than the element bit-width of the shift node, return the + /// valid constant range. + std::optional<ConstantRange> + getValidShiftAmountRange(Register R, const APInt &DemandedElts, + unsigned Depth); + + /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts + /// that are all less than the element bit-width of the shift node, return the + /// minimum possible value. + std::optional<uint64_t> getValidMinimumShiftAmount(Register R, + const APInt &DemandedElts, + unsigned Depth = 0); + /// Determine which floating-point classes are valid for \p V, and return them /// in KnownFPClass bit sets. /// diff --git a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h index aef9190..7940752 100644 --- a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h +++ b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h @@ -39,8 +39,8 @@ class MachineBlockFrequencyInfo { public: LLVM_ABI MachineBlockFrequencyInfo(); // Legacy pass manager only. LLVM_ABI explicit MachineBlockFrequencyInfo( - MachineFunction &F, MachineBranchProbabilityInfo &MBPI, - MachineLoopInfo &MLI); + const MachineFunction &F, const MachineBranchProbabilityInfo &MBPI, + const MachineLoopInfo &MLI); LLVM_ABI MachineBlockFrequencyInfo(MachineBlockFrequencyInfo &&); LLVM_ABI ~MachineBlockFrequencyInfo(); diff --git a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h index 98a60c9..1d954cf 100644 --- a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h @@ -46,6 +46,11 @@ public: LLVM_ABI Result run(Function &F, FunctionAnalysisManager &FAM); }; +class FreeMachineFunctionPass : public PassInfoMixin<FreeMachineFunctionPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; + } // namespace llvm #endif // LLVM_CODEGEN_MachineFunctionAnalysis diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 72594c7..1a548a5 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3201,11 +3201,15 @@ public: /// Lower an interleaved load to target specific intrinsics. Return /// true on success. /// - /// \p LI is the vector load instruction. + /// \p Load is the vector load instruction. Can be either a plain load + /// instruction or a vp.load intrinsic. + /// \p Mask is a per-segment (i.e. number of lanes equal to that of one + /// component being interwoven) mask. Can be nullptr, in which case the + /// result is uncondiitional. /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. /// \p Indices is the corresponding indices for each shufflevector. /// \p Factor is the interleave factor. - virtual bool lowerInterleavedLoad(LoadInst *LI, + virtual bool lowerInterleavedLoad(Instruction *Load, Value *Mask, ArrayRef<ShuffleVectorInst *> Shuffles, ArrayRef<unsigned> Indices, unsigned Factor) const { @@ -3223,17 +3227,6 @@ public: return false; } - /// Lower an interleaved load to target specific intrinsics. Return - /// true on success. - /// - /// \p Load is a vp.load instruction. - /// \p Mask is a mask value - /// \p DeinterleaveRes is a list of deinterleaved results. - virtual bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask, - ArrayRef<Value *> DeinterleaveRes) const { - return false; - } - /// Lower an interleaved store to target specific intrinsics. Return /// true on success. /// @@ -3251,10 +3244,9 @@ public: /// /// \p Load is the accompanying load instruction. Can be either a plain load /// instruction or a vp.load intrinsic. - /// \p DeinterleaveValues contains the deinterleaved values. - virtual bool - lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, - ArrayRef<Value *> DeinterleaveValues) const { + /// \p DI represents the deinterleaveN intrinsic. + virtual bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, + IntrinsicInst *DI) const { return false; } @@ -3262,10 +3254,14 @@ public: /// Return true on success. Currently only supports /// llvm.vector.interleave{2,3,5,7} /// - /// \p SI is the accompanying store instruction + /// \p Store is the accompanying store instruction. Can be either a plain + /// store or a vp.store intrinsic. + /// \p Mask is a per-segment (i.e. number of lanes equal to that of one + /// component being interwoven) mask. Can be nullptr, in which case the + /// result is uncondiitional. /// \p InterleaveValues contains the interleaved values. virtual bool - lowerInterleaveIntrinsicToStore(StoreInst *SI, + lowerInterleaveIntrinsicToStore(Instruction *Store, Value *Mask, ArrayRef<Value *> InterleaveValues) const { return false; } diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.h b/llvm/include/llvm/Frontend/OpenMP/OMP.h index d44c333..9d0a554 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.h @@ -51,13 +51,17 @@ static constexpr inline bool canHaveIterator(Clause C) { // Can clause C create a private copy of a variable. static constexpr inline bool isPrivatizingClause(Clause C) { switch (C) { + case OMPC_detach: case OMPC_firstprivate: + // TODO case OMPC_induction: case OMPC_in_reduction: + case OMPC_is_device_ptr: case OMPC_lastprivate: case OMPC_linear: case OMPC_private: case OMPC_reduction: case OMPC_task_reduction: + case OMPC_use_device_ptr: return true; default: return false; diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h index d68491e..ef761eb 100644 --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -284,6 +284,9 @@ namespace CallingConv { RISCV_VLSCall_32768 = 122, RISCV_VLSCall_65536 = 123, + // Calling convention for AMDGPU whole wave functions. + AMDGPU_Gfx_WholeWave = 124, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; @@ -294,8 +297,13 @@ namespace CallingConv { /// directly or indirectly via a call-like instruction. constexpr bool isCallableCC(CallingConv::ID CC) { switch (CC) { + // Called with special intrinsics: + // llvm.amdgcn.cs.chain case CallingConv::AMDGPU_CS_Chain: case CallingConv::AMDGPU_CS_ChainPreserve: + // llvm.amdgcn.call.whole.wave + case CallingConv::AMDGPU_Gfx_WholeWave: + // Hardware entry points: case CallingConv::AMDGPU_CS: case CallingConv::AMDGPU_ES: case CallingConv::AMDGPU_GS: diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h index 77cee87..c529a86 100644 --- a/llvm/include/llvm/IR/DebugInfo.h +++ b/llvm/include/llvm/IR/DebugInfo.h @@ -39,30 +39,26 @@ class DbgVariableRecord; class Instruction; class Module; -/// Finds dbg.declare intrinsics declaring local variables as living in the +/// Finds dbg.declare records declaring local variables as living in the /// memory that 'V' points to. -LLVM_ABI TinyPtrVector<DbgDeclareInst *> findDbgDeclares(Value *V); -/// As above, for DVRDeclares. LLVM_ABI TinyPtrVector<DbgVariableRecord *> findDVRDeclares(Value *V); /// As above, for DVRValues. LLVM_ABI TinyPtrVector<DbgVariableRecord *> findDVRValues(Value *V); -/// Finds the llvm.dbg.value intrinsics describing a value. -LLVM_ABI void findDbgValues( - SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V, - SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords = nullptr); - -/// Finds the debug info intrinsics describing a value. -LLVM_ABI void findDbgUsers( - SmallVectorImpl<DbgVariableIntrinsic *> &DbgInsts, Value *V, - SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords = nullptr); +/// Finds the debug info records describing a value. +LLVM_ABI void +findDbgUsers(Value *V, + SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords); +/// Finds the dbg.values describing a value. +LLVM_ABI void +findDbgValues(Value *V, + SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords); /// Find subprogram that is enclosing this scope. LLVM_ABI DISubprogram *getDISubprogram(const MDNode *Scope); /// Produce a DebugLoc to use for each dbg.declare that is promoted to a /// dbg.value. -LLVM_ABI DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII); LLVM_ABI DebugLoc getDebugValueLoc(DbgVariableRecord *DVR); /// Strip debug info in the module if it exists. @@ -115,8 +111,7 @@ public: LLVM_ABI void processVariable(DILocalVariable *DVI); /// Process debug info location. LLVM_ABI void processLocation(const Module &M, const DILocation *Loc); - /// Process a DbgRecord (e.g, treat a DbgVariableRecord like a - /// DbgVariableIntrinsic). + /// Process a DbgRecord. LLVM_ABI void processDbgRecord(const Module &M, const DbgRecord &DR); /// Process subprogram. @@ -193,13 +188,6 @@ using AssignmentInstRange = /// Iterators invalidated by adding or removing DIAssignID metadata to/from any /// instruction (including by deleting or cloning instructions). LLVM_ABI AssignmentInstRange getAssignmentInsts(DIAssignID *ID); -/// Return a range of instructions (typically just one) that perform the -/// assignment that \p DAI encodes. -/// Iterators invalidated by adding or removing DIAssignID metadata to/from any -/// instruction (including by deleting or cloning instructions). -inline AssignmentInstRange getAssignmentInsts(const DbgAssignIntrinsic *DAI) { - return getAssignmentInsts(DAI->getAssignID()); -} inline AssignmentInstRange getAssignmentInsts(const DbgVariableRecord *DVR) { assert(DVR->isDbgAssign() && @@ -290,8 +278,6 @@ struct VarRecord { DILocalVariable *Var; DILocation *DL; - VarRecord(DbgVariableIntrinsic *DVI) - : Var(DVI->getVariable()), DL(getDebugValueLoc(DVI)) {} VarRecord(DbgVariableRecord *DVR) : Var(DVR->getVariable()), DL(getDebugValueLoc(DVR)) {} VarRecord(DILocalVariable *Var, DILocation *DL) : Var(Var), DL(DL) {} diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 9345f95..f1f0c189 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -66,7 +66,6 @@ namespace dwarf { enum Tag : uint16_t; } -class DbgVariableIntrinsic; class DbgVariableRecord; LLVM_ABI extern cl::opt<bool> EnableFSDiscriminator; @@ -4613,7 +4612,6 @@ class DebugVariable { LLVM_ABI static const FragmentInfo DefaultFragment; public: - LLVM_ABI DebugVariable(const DbgVariableIntrinsic *DII); LLVM_ABI DebugVariable(const DbgVariableRecord *DVR); DebugVariable(const DILocalVariable *Var, @@ -4681,7 +4679,6 @@ template <> struct DenseMapInfo<DebugVariable> { /// information). class DebugVariableAggregate : public DebugVariable { public: - LLVM_ABI DebugVariableAggregate(const DbgVariableIntrinsic *DVI); DebugVariableAggregate(const DebugVariable &V) : DebugVariable(V.getVariable(), std::nullopt, V.getInlinedAt()) {} }; diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index df572e8..90276ea 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -53,3 +53,4 @@ LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38) LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39) LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40) LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41) +LLVM_FIXED_MD_KIND(MD_callee_type, "callee_type", 42) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index d8fda0e..8bfa345 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -3572,6 +3572,12 @@ def int_amdgcn_cvt_f16_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_bf8">, [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; +def int_amdgcn_sat_pk4_i4_i8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_i4_i8">, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; + +def int_amdgcn_sat_pk4_u4_u8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_u4_u8">, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. @@ -3711,6 +3717,20 @@ class AMDGPUWmmaIntrinsicModsAllDiff<LLVMType DstTy, LLVMType AB, LLVMType C> : IntrWillReturn, IntrNoCallback, IntrNoFree] >; +class AMDGPUWmmaIntrinsicModsC_MatrixFMT : + Intrinsic< + [llvm_anyfloat_ty], // %D + [ + llvm_i32_ty, // matrix_a_fmt + llvm_anyint_ty, // %A + llvm_i32_ty, // matrix_b_fmt + llvm_anyint_ty, // %B + llvm_i16_ty, // %C_mod: 0 - none, 1 - neg, 2 - abs, 3 - neg(abs) + LLVMMatchType<0>, // %C + ], + [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree] +>; + defset list<Intrinsic> AMDGPUWMMAIntrinsicsGFX1250 = { def int_amdgcn_wmma_f32_16x16x4_f32 : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>; def int_amdgcn_wmma_f32_16x16x32_bf16 : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>; @@ -3735,6 +3755,7 @@ def int_amdgcn_wmma_f32_16x16x128_fp8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint def int_amdgcn_wmma_f32_16x16x128_bf8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>; def int_amdgcn_wmma_f32_16x16x128_bf8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>; def int_amdgcn_wmma_i32_16x16x64_iu8 : AMDGPUWmmaIntrinsicModsAB<llvm_anyint_ty, llvm_anyint_ty>; +def int_amdgcn_wmma_f32_16x16x128_f8f6f4 : AMDGPUWmmaIntrinsicModsC_MatrixFMT; def int_amdgcn_wmma_f32_32x16x128_f4 : AMDGPUWmmaIntrinsicF4ModsC<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty>; } diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 0375f29..967d166 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -331,6 +331,11 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> { !eq(gf,"m8n16:x2") : !listsplat(llvm_i32_ty, 2), !eq(gf,"m8n16:x4") : !listsplat(llvm_i32_ty, 4), + // stmatrix b8 -> s32 @ m16n8 + !eq(gf,"m16n8:x1") : !listsplat(llvm_i32_ty, 1), + !eq(gf,"m16n8:x2") : !listsplat(llvm_i32_ty, 2), + !eq(gf,"m16n8:x4") : !listsplat(llvm_i32_ty, 4), + ); } @@ -403,6 +408,17 @@ class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> { !subst("llvm.", "int_", intr)); } +class STMATRIX_NAME<WMMA_REGS Frag, int Trans> { + string intr = "llvm.nvvm.stmatrix.sync.aligned" + # "." # Frag.geom + # "." # Frag.frag + # !if(Trans, ".trans", "") + # "." # Frag.ptx_elt_type + ; + string record = !subst(".", "_", + !subst("llvm.", "int_", intr)); +} + // Generates list of 4-tuples of WMMA_REGS representing a valid MMA op. // Geom: list of supported geometries. // TypeN: PTX type of the corresponding fragment's element. @@ -443,6 +459,16 @@ class LDMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> { list<string> ops = !foreach(x, ret, x.gft); } +class STMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> { + list<WMMA_REGS> ret = + !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1, + !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2, + !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3, + [WMMA_REGS<geom, frag, type>])))))); + // Debugging aid for readable representation of the list above. + list<string> ops = !foreach(x, ret, x.gft); +} + // Creates list of valid combinations of fragments. This is the main list that // drives generation of corresponding intrinsics and instructions. class NVVM_MMA_OPS { @@ -537,9 +563,18 @@ class NVVM_MMA_OPS { list<WMMA_REGS> ldmatrix_geom_m8n16_ops = LDMATRIX_OPS< ["m8n16"], ["x1", "x2", "x4"], ["b8x16.b6x16_p32", "b8x16.b4x16_p64"]>.ret; + list<WMMA_REGS> stmatrix_b16_ops = STMATRIX_OPS< + ["m8n8"], ["x1", "x2", "x4"], ["b16"]>.ret; + + list<WMMA_REGS> stmatrix_b8_ops = STMATRIX_OPS< + ["m16n8"], ["x1", "x2", "x4"], ["b8"]>.ret; + list<WMMA_REGS> all_ldmatrix_ops = !listconcat(ldmatrix_b16_ops, ldmatrix_geom_m16n16_ops, ldmatrix_geom_m8n16_ops); + + list<WMMA_REGS> all_stmatrix_ops = !listconcat(stmatrix_b16_ops, + stmatrix_b8_ops); } def NVVM_MMA_OPS : NVVM_MMA_OPS; @@ -680,6 +715,19 @@ class NVVM_LDMATRIX_SUPPORTED<WMMA_REGS frag, bit trans> { ); } +// Returns true if the fragment is valid for stmatrix ops is supported; +// false otherwise. +class NVVM_STMATRIX_SUPPORTED<WMMA_REGS frag, bit trans> { + string g = frag.geom; + string t = frag.ptx_elt_type; + + bit ret = !cond( + !and(!eq(g, "m8n8"), !eq(t, "b16")): true, + !and(!eq(g, "m16n8"), !eq(t, "b8"), !eq(trans, 1)): true, + true: false + ); +} + class SHFL_INFO<bit sync, string mode, string type, bit return_pred> { string Suffix = !if(sync, "sync_", "") # mode # "_" @@ -1969,6 +2017,23 @@ foreach transposed = [0, 1] in { } } +// STMATRIX +class NVVM_STMATRIX<WMMA_REGS Frag, int Transposed> + : Intrinsic<[], + !listconcat([llvm_anyptr_ty], Frag.regs), + [IntrWriteMem, IntrArgMemOnly, IntrNoCallback, + WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>], + STMATRIX_NAME<Frag, Transposed>.intr>; + +foreach transposed = [0, 1] in { + foreach frag = NVVM_MMA_OPS.all_stmatrix_ops in { + if NVVM_STMATRIX_SUPPORTED<frag, transposed>.ret then { + def STMATRIX_NAME<frag, transposed>.record + : NVVM_STMATRIX<frag, transposed>; + } + } +} + // MAPA let IntrProperties = [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>] in { def int_nvvm_mapa @@ -2024,9 +2089,7 @@ foreach dim = 1...5 in { tensor_dim_args, // actual tensor dims [llvm_i64_ty]), // cache_hint [llvm_i1_ty], // Flag for cache_hint - [IntrConvergent, - ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>, - NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>]>; + [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>; // Intrinsics for TMA Copy with reduction foreach red_op = ["add", "min", "max", "inc", "dec", "and", "or", "xor"] in @@ -2037,18 +2100,31 @@ foreach dim = 1...5 in { tensor_dim_args, // actual tensor dims [llvm_i64_ty]), // cache_hint [llvm_i1_ty], // Flag for cache_hint - [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>, - NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>]>; + [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>; } } +// TMA S2G tile::scatter4 +def int_nvvm_cp_async_bulk_tensor_s2g_tile_scatter4_2d + : DefaultAttrsIntrinsicFlags<[], + !listconcat([llvm_shared_ptr_ty, // src_smem_ptr + llvm_ptr_ty], // tensormap_ptr + !listsplat(llvm_i32_ty, 5), // dims + [llvm_i64_ty]), // cache_hint + [llvm_i1_ty], // Flag for cache_hint + [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>; + // TMA Tensor Copy Intrinsics: G2S -> From Global to Shared memory variants foreach dim = 1...5 in { defvar tensor_dim_args = !listsplat(llvm_i32_ty, dim); - foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in { + foreach mode = !if(!ge(dim, 3), ["tile", "im2col", "im2col_w", "im2col_w_128"], ["tile"]) in { defvar is_im2col = !eq(mode, "im2col"); - defvar num_im2col_offsets = !if(is_im2col, !add(dim, -2), 0); + defvar is_im2colw = !or(!eq(mode, "im2col_w"), !eq(mode, "im2col_w_128")); + + // For im2col_w/w128 modes, the num_offsets is always 2. + // For im2col mode, the num_offsets is (dim - 2). + defvar num_im2col_offsets = !if(is_im2colw, 2, !if(is_im2col, !add(dim, -2), 0)); defvar im2col_offsets_args = !listsplat(llvm_i16_ty, num_im2col_offsets); defvar g2s_params = !listconcat( @@ -2079,11 +2155,60 @@ foreach dim = 1...5 in { im2col_offsets_args, // im2col offsets [llvm_i64_ty]), // cache_hint [llvm_i1_ty], // Flag for cache_hint - [IntrConvergent, - ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>; + [IntrConvergent, ReadOnly<ArgIndex<0>>]>; + + def int_nvvm_cp_async_bulk_tensor_g2s_cta_ # mode # _ # dim # d : + DefaultAttrsIntrinsicFlags<[], + !listconcat([llvm_shared_ptr_ty, // dst_ptr + llvm_shared_ptr_ty, // mbarrier_ptr + llvm_ptr_ty], // tensormap_ptr + tensor_dim_args, // actual tensor dims + im2col_offsets_args, // im2col offsets + [llvm_i64_ty]), // cache_hint + [llvm_i1_ty], // Flag for cache_hint + [IntrConvergent, WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>]>; } } +// TMA copy for tile::gather4 +def int_nvvm_cp_async_bulk_tensor_g2s_tile_gather4_2d + : DefaultAttrsIntrinsicFlags<[], + !listconcat( + [llvm_shared_cluster_ptr_ty, // dst_shared_cluster_ptr + llvm_shared_ptr_ty, // mbarrier_ptr + llvm_ptr_ty], // tensormap_ptr + !listsplat(llvm_i32_ty, 5), // co-ordinates + [llvm_i16_ty, // cta_mask + llvm_i64_ty]), // cache_hint + [llvm_i1_ty, // Flag for cta_mask + llvm_i1_ty, // Flag for cache_hint + llvm_i32_ty], // Flag for cta_group + [IntrConvergent, + WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>, + // Allowed values for cta_group are {0,1,2} i.e [0, 3). + Range<ArgIndex<12>, 0, 3>]>; + +def int_nvvm_cp_async_bulk_tensor_g2s_cta_tile_gather4_2d + : DefaultAttrsIntrinsicFlags<[], + !listconcat( + [llvm_shared_ptr_ty, // dst_shared_ptr + llvm_shared_ptr_ty, // mbarrier_ptr + llvm_ptr_ty], // tensormap_ptr + !listsplat(llvm_i32_ty, 5), // co-ordinates + [llvm_i64_ty]), // cache_hint + [llvm_i1_ty], // Flag for cache_hint + [IntrConvergent, + WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>]>; + +// TMA prefetch for tile::gather4 +def int_nvvm_cp_async_bulk_tensor_prefetch_tile_gather4_2d + : DefaultAttrsIntrinsicFlags<[], + !listconcat([llvm_ptr_ty], // tensormap_ptr + !listsplat(llvm_i32_ty, 5), // co-ordinates + [llvm_i64_ty]), // cache_hint + [llvm_i1_ty], // Flag for cache_hint + [IntrConvergent, ReadOnly<ArgIndex<0>>]>; + // Intrinsics for Prefetch and Prefetchu let IntrProperties = [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>] in { foreach level = ["L1", "L2"] in { diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 35c9cd6..b5f0cdf 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -90,7 +90,12 @@ let TargetPrefix = "spv" in { def int_spv_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_reflect : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>; - def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; + def int_spv_refract + : DefaultAttrsIntrinsic<[LLVMMatchType<0>], + [llvm_anyfloat_ty, LLVMMatchType<0>, + llvm_anyfloat_ty], + [IntrNoMem]>; +def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_spv_smoothstep : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 2de26c0..af252aa 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -1255,6 +1255,13 @@ public: bool isReplaceable() const { return isTemporary() || isAlwaysReplaceable(); } bool isAlwaysReplaceable() const { return getMetadataID() == DIAssignIDKind; } + /// Check if this is a valid generalized type metadata node. + bool hasGeneralizedMDString() { + if (getNumOperands() < 2 || !isa<MDString>(getOperand(1))) + return false; + return cast<MDString>(getOperand(1))->getString().ends_with(".generalized"); + } + unsigned getNumTemporaryUses() const { assert(isTemporary() && "Only for temporaries"); return Context.getReplaceableUses()->getNumUses(); @@ -1467,6 +1474,8 @@ public: const Instruction *BInstr); LLVM_ABI static MDNode *getMergedMemProfMetadata(MDNode *A, MDNode *B); LLVM_ABI static MDNode *getMergedCallsiteMetadata(MDNode *A, MDNode *B); + LLVM_ABI static MDNode *getMergedCalleeTypeMetadata(const MDNode *A, + const MDNode *B); }; /// Tuple of metadata. diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h index 737610b..0fd5de3 100644 --- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h +++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h @@ -112,7 +112,6 @@ inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { return false; } llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic"); - return false; } inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) { @@ -179,7 +178,6 @@ inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) { } llvm_unreachable( "Checking invalid f2i/d2i intrinsic for signed int conversion"); - return false; } inline APFloat::roundingMode @@ -250,7 +248,6 @@ GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) { return APFloat::rmTowardZero; } llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic"); - return APFloat::roundingMode::Invalid; } inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) { @@ -280,7 +277,6 @@ inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) { return false; } llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic"); - return false; } inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) { @@ -310,7 +306,6 @@ inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) { return false; } llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic"); - return false; } inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) { @@ -340,7 +335,83 @@ inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) { return false; } llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic"); - return false; +} + +inline bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_ceil_ftz_f: + case Intrinsic::nvvm_fabs_ftz: + case Intrinsic::nvvm_floor_ftz_f: + case Intrinsic::nvvm_round_ftz_f: + case Intrinsic::nvvm_saturate_ftz_f: + case Intrinsic::nvvm_sqrt_rn_ftz_f: + return true; + case Intrinsic::nvvm_ceil_f: + case Intrinsic::nvvm_ceil_d: + case Intrinsic::nvvm_fabs: + case Intrinsic::nvvm_floor_f: + case Intrinsic::nvvm_floor_d: + case Intrinsic::nvvm_round_f: + case Intrinsic::nvvm_round_d: + case Intrinsic::nvvm_saturate_d: + case Intrinsic::nvvm_saturate_f: + case Intrinsic::nvvm_sqrt_f: + case Intrinsic::nvvm_sqrt_rn_d: + case Intrinsic::nvvm_sqrt_rn_f: + return false; + } + llvm_unreachable("Checking FTZ flag for invalid unary intrinsic"); +} + +inline bool RCPShouldFTZ(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_rcp_rm_ftz_f: + case Intrinsic::nvvm_rcp_rn_ftz_f: + case Intrinsic::nvvm_rcp_rp_ftz_f: + case Intrinsic::nvvm_rcp_rz_ftz_f: + return true; + case Intrinsic::nvvm_rcp_rm_d: + case Intrinsic::nvvm_rcp_rm_f: + case Intrinsic::nvvm_rcp_rn_d: + case Intrinsic::nvvm_rcp_rn_f: + case Intrinsic::nvvm_rcp_rp_d: + case Intrinsic::nvvm_rcp_rp_f: + case Intrinsic::nvvm_rcp_rz_d: + case Intrinsic::nvvm_rcp_rz_f: + return false; + } + llvm_unreachable("Checking FTZ flag for invalid rcp intrinsic"); +} + +inline APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_rcp_rm_f: + case Intrinsic::nvvm_rcp_rm_d: + case Intrinsic::nvvm_rcp_rm_ftz_f: + return APFloat::rmTowardNegative; + + case Intrinsic::nvvm_rcp_rn_f: + case Intrinsic::nvvm_rcp_rn_d: + case Intrinsic::nvvm_rcp_rn_ftz_f: + return APFloat::rmNearestTiesToEven; + + case Intrinsic::nvvm_rcp_rp_f: + case Intrinsic::nvvm_rcp_rp_d: + case Intrinsic::nvvm_rcp_rp_ftz_f: + return APFloat::rmTowardPositive; + + case Intrinsic::nvvm_rcp_rz_f: + case Intrinsic::nvvm_rcp_rz_d: + case Intrinsic::nvvm_rcp_rz_ftz_f: + return APFloat::rmTowardZero; + } + llvm_unreachable("Checking rounding mode for invalid rcp intrinsic"); +} + +inline DenormalMode GetNVVMDenromMode(bool ShouldFTZ) { + if (ShouldFTZ) + return DenormalMode::getPreserveSign(); + return DenormalMode::getIEEE(); } } // namespace nvvm diff --git a/llvm/include/llvm/IR/OptBisect.h b/llvm/include/llvm/IR/OptBisect.h index ea3c1de..d813ae9 100644 --- a/llvm/include/llvm/IR/OptBisect.h +++ b/llvm/include/llvm/IR/OptBisect.h @@ -15,6 +15,7 @@ #define LLVM_IR_OPTBISECT_H #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Support/Compiler.h" #include <limits> @@ -82,8 +83,38 @@ private: mutable int LastBisectNum = 0; }; -/// Singleton instance of the OptBisect class, so multiple pass managers don't -/// need to coordinate their uses of OptBisect. +/// This class implements a mechanism to disable passes and individual +/// optimizations at compile time based on a command line option +/// (-opt-disable) in order to study how single transformations, or +/// combinations thereof, affect the IR. +class LLVM_ABI OptDisable : public OptPassGate { +public: + /// Checks the pass name to determine if the specified pass should run. + /// + /// It returns true if the pass should run, i.e. if its name is was + /// not provided via command line. + /// If -opt-disable-enable-verbosity is given, the method prints the + /// name of the pass, and whether or not the pass will be executed. + /// + /// Most passes should not call this routine directly. Instead, it is called + /// through helper routines provided by the base classes of the pass. For + /// instance, function passes should call FunctionPass::skipFunction(). + bool shouldRunPass(StringRef PassName, + StringRef IRDescription) const override; + + /// Parses the command line argument to extract the names of the passes + /// to be disabled. Multiple pass names can be provided with comma separation. + void setDisabled(StringRef Pass); + + /// isEnabled() should return true before calling shouldRunPass(). + bool isEnabled() const override { return !DisabledPasses.empty(); } + +private: + StringSet<> DisabledPasses = {}; +}; + +/// Singleton instance of the OptPassGate class, so multiple pass managers don't +/// need to coordinate their uses of OptBisect and OptDisable. LLVM_ABI OptPassGate &getGlobalPassGate(); } // end namespace llvm diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h index 4f44ae5..ea8226c 100644 --- a/llvm/include/llvm/IR/PassManager.h +++ b/llvm/include/llvm/IR/PassManager.h @@ -491,6 +491,22 @@ public: /// invalidate them, unless they are preserved by the PreservedAnalyses set. void invalidate(IRUnitT &IR, const PreservedAnalyses &PA); + /// Directly clear a cached analysis for an IR unit. + /// + /// Using invalidate() over this is preferred unless you are really + /// sure you want to *only* clear this analysis without asking if it is + /// invalid. + template <typename AnalysisT> void clearAnalysis(IRUnitT &IR) { + AnalysisResultListT &ResultsList = AnalysisResultLists[&IR]; + AnalysisKey *ID = AnalysisT::ID(); + + auto I = + llvm::find_if(ResultsList, [&ID](auto &E) { return E.first == ID; }); + assert(I != ResultsList.end() && "Analysis must be available"); + ResultsList.erase(I); + AnalysisResults.erase({ID, &IR}); + } + private: /// Look up a registered analysis pass. PassConceptT &lookUpPass(AnalysisKey *ID) { diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 8058c8a..89ad4e5 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -132,6 +132,10 @@ struct RuntimeLibcallsInfo { return ImplToLibcall[Impl]; } + /// Check if this is valid libcall for the current module, otherwise + /// RTLIB::Unsupported. + RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const; + private: static const RTLIB::LibcallImpl DefaultLibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1]; @@ -156,6 +160,14 @@ private: /// Map from a concrete LibcallImpl implementation to its RTLIB::Libcall kind. LLVM_ABI static const RTLIB::Libcall ImplToLibcall[RTLIB::NumLibcallImpls]; + /// Check if a function name is a recognized runtime call of any kind. This + /// does not consider if this call is available for any current compilation, + /// just that it is a known call somewhere. This returns the set of all + /// LibcallImpls which match the name; multiple implementations with the same + /// name may exist but differ in interpretation based on the target context. + LLVM_ABI static iterator_range<ArrayRef<uint16_t>::const_iterator> + getRecognizedLibcallImpls(StringRef FuncName); + static bool darwinHasSinCosStret(const Triple &TT) { if (!TT.isOSDarwin()) return false; diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index 11926d4..f0297cd 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -2129,7 +2129,7 @@ defvar X86CommonLibcalls = ); defvar Windows32DivRemMulCalls = - LibcallImpls<(add WindowsDivRemMulLibcalls), + LibcallsWithCC<(add WindowsDivRemMulLibcalls), X86_STDCALL, RuntimeLibcallPredicate<"TT.isWindowsMSVCEnvironment() || TT.isWindowsItaniumEnvironment()">>; def X86_32SystemLibrary diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h index 0322cbe..bfc1175 100644 --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -18,9 +18,7 @@ namespace llvm { -class MCAlignFragment; class MCFragment; -class MCLEBFragment; class MCSymbol; class MCAssembler; class MCContext; @@ -60,6 +58,9 @@ protected: // Can only create subclasses. MCAssembler *Asm = nullptr; + bool AllowAutoPadding = false; + bool AllowEnhancedRelaxation = false; + public: MCAsmBackend(const MCAsmBackend &) = delete; MCAsmBackend &operator=(const MCAsmBackend &) = delete; @@ -73,11 +74,11 @@ public: /// Return true if this target might automatically pad instructions and thus /// need to emit padding enable/disable directives around sensative code. - virtual bool allowAutoPadding() const { return false; } + bool allowAutoPadding() const { return AllowAutoPadding; } /// Return true if this target allows an unrelaxable instruction to be /// emitted into RelaxableFragment and then we can increase its size in a /// tricky way for optimization. - virtual bool allowEnhancedRelaxation() const { return false; } + bool allowEnhancedRelaxation() const { return AllowEnhancedRelaxation; } /// lifetime management virtual void reset() {} @@ -105,21 +106,6 @@ public: /// Get information on a fixup kind. virtual MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const; - /// Hook to check if extra nop bytes must be inserted for alignment directive. - /// For some targets this may be necessary in order to support linker - /// relaxation. The number of bytes to insert are returned in Size. - virtual bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF, - unsigned &Size) { - return false; - } - - /// Hook which indicates if the target requires a fixup to be generated when - /// handling an align directive in an executable section - virtual bool shouldInsertFixupForCodeAlign(MCAssembler &Asm, - MCAlignFragment &AF) { - return false; - } - // Evaluate a fixup, returning std::nullopt to use default handling for // `Value` and `IsResolved`. Otherwise, returns `IsResolved` with the // expectation that the hook updates `Value`. @@ -177,6 +163,10 @@ public: } // Defined by linker relaxation targets. + + // Return false to use default handling. Otherwise, set `Size` to the number + // of padding bytes. + virtual bool relaxAlign(MCFragment &F, unsigned &Size) { return false; } virtual bool relaxDwarfLineAddr(MCFragment &, bool &WasRelaxed) const { return false; } diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h index aa396ef..467ad4e 100644 --- a/llvm/include/llvm/MC/MCAssembler.h +++ b/llvm/include/llvm/MC/MCAssembler.h @@ -36,8 +36,6 @@ class MCCVDefRangeFragment; class MCCVInlineLineTableFragment; class MCFragment; class MCFixup; -class MCLEBFragment; -class MCPseudoProbeAddrFragment; class MCSymbolRefExpr; class raw_ostream; class MCAsmBackend; @@ -69,6 +67,13 @@ private: SmallVector<const MCSymbol *, 0> Symbols; + struct RelocDirective { + const MCExpr &Offset; + const MCExpr *Expr; + uint32_t Kind; + }; + SmallVector<RelocDirective, 0> relocDirectives; + mutable SmallVector<std::pair<SMLoc, std::string>, 0> PendingErrors; MCDwarfLineTableParams LTParams; @@ -116,7 +121,6 @@ private: bool relaxCVInlineLineTable(MCCVInlineLineTableFragment &DF); bool relaxCVDefRange(MCCVDefRangeFragment &DF); bool relaxFill(MCFillFragment &F); - bool relaxPseudoProbeAddr(MCPseudoProbeAddrFragment &DF); public: /// Construct a new assembler instance. @@ -205,6 +209,7 @@ public: LLVM_ABI bool registerSection(MCSection &Section); LLVM_ABI bool registerSymbol(const MCSymbol &Symbol); + void addRelocDirective(RelocDirective RD); LLVM_ABI void reportError(SMLoc L, const Twine &Msg) const; // Record pending errors during layout iteration, as they may go away once the diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h index 3a7ca1a..cae2fbc 100644 --- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -136,6 +136,18 @@ public: ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &CStream) const = 0; + /// Returns the disassembly of an instruction bundle for VLIW architectures + /// like Hexagon. + /// + /// \param Instr - An MCInst to populate with the contents of + /// the Bundle with sub-instructions encoded as Inst operands. + virtual DecodeStatus getInstructionBundle(MCInst &Instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address, + raw_ostream &CStream) const { + return Fail; + } + /// Used to perform separate target specific disassembly for a particular /// symbol. May parse any prelude that precedes instructions after the /// start of a symbol, or the entire symbol. diff --git a/llvm/include/llvm/MC/MCELFStreamer.h b/llvm/include/llvm/MC/MCELFStreamer.h index ad0961c..144f6bc 100644 --- a/llvm/include/llvm/MC/MCELFStreamer.h +++ b/llvm/include/llvm/MC/MCELFStreamer.h @@ -141,7 +141,8 @@ public: } private: - void finalizeCGProfileEntry(const MCSymbolRefExpr *&S, uint64_t Offset); + void finalizeCGProfileEntry(const MCSymbolRefExpr *Sym, uint64_t Offset, + const MCSymbolRefExpr *&S); void finalizeCGProfile(); bool SeenIdent = false; diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index e2a77b8..2ceeba2 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -40,14 +40,6 @@ class MCObjectStreamer : public MCStreamer { std::unique_ptr<MCAssembler> Assembler; bool EmitEHFrame; bool EmitDebugFrame; - struct PendingMCFixup { - const MCSymbol *Sym; - MCFixup Fixup; - MCFragment *DF; - PendingMCFixup(const MCSymbol *McSym, MCFragment *F, MCFixup McFixup) - : Sym(McSym), Fixup(McFixup), DF(F) {} - }; - SmallVector<PendingMCFixup, 2> PendingFixups; struct PendingAssignment { MCSymbol *Symbol; @@ -62,8 +54,6 @@ class MCObjectStreamer : public MCStreamer { void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &); void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; - void emitInstructionImpl(const MCInst &Inst, const MCSubtargetInfo &STI); - void resolvePendingFixups(); protected: MCObjectStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB, @@ -82,24 +72,6 @@ public: MCSymbol *emitCFILabel() override; void emitCFISections(bool EH, bool Debug) override; - void insert(MCFragment *F) { - auto *Sec = CurFrag->getParent(); - F->setParent(Sec); - F->setLayoutOrder(CurFrag->getLayoutOrder() + 1); - CurFrag->Next = F; - CurFrag = F; - Sec->curFragList()->Tail = F; - } - - /// Get a data fragment to write into, creating a new one if the current - /// fragment is not FT_Data. - /// Optionally a \p STI can be passed in so that a new fragment is created - /// if the Subtarget differs from the current fragment. - MCFragment *getOrCreateDataFragment(const MCSubtargetInfo *STI = nullptr); - -protected: - bool changeSectionImpl(MCSection *Section, uint32_t Subsection); - public: void visitUsedSymbol(const MCSymbol &Sym) override; @@ -108,6 +80,15 @@ public: /// \name MCStreamer Interface /// @{ + // Add a fragment with a variable-size tail and start a new empty fragment. + void insert(MCFragment *F); + + // Add a new fragment to the current section without a variable-size tail. + void newFragment(); + + void appendContents(size_t Num, char Elt); + void addFixup(const MCExpr *Value, MCFixupKind Kind); + void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; virtual void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment &F, uint64_t Offset); @@ -162,9 +143,8 @@ public: void emitCVStringTableDirective() override; void emitCVFileChecksumsDirective() override; void emitCVFileChecksumOffsetDirective(unsigned FileNo) override; - std::optional<std::pair<bool, std::string>> - emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr, - SMLoc Loc, const MCSubtargetInfo &STI) override; + void emitRelocDirective(const MCExpr &Offset, StringRef Name, + const MCExpr *Expr, SMLoc Loc = {}) override; using MCStreamer::emitFill; void emitFill(const MCExpr &NumBytes, uint64_t FillValue, SMLoc Loc = SMLoc()) override; diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h index 296fdd8..87a8349 100644 --- a/llvm/include/llvm/MC/MCSection.h +++ b/llvm/include/llvm/MC/MCSection.h @@ -91,8 +91,7 @@ private: bool IsRegistered : 1; bool IsText : 1; - - bool IsVirtual : 1; + bool IsBss : 1; /// Whether the section contains linker-relaxable fragments. If true, the /// offset between two locations may not be fully resolved. @@ -113,7 +112,7 @@ protected: StringRef Name; SectionVariant Variant; - MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsVirtual, + MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsBss, MCSymbol *Begin); // Protected non-virtual dtor prevents destroy through a base class pointer. ~MCSection() {} @@ -178,9 +177,7 @@ public: /// Check whether this section is "virtual", that is has no actual object /// file contents. - bool isVirtualSection() const { return IsVirtual; } - - virtual StringRef getVirtualSectionKind() const; + bool isBssSection() const { return IsBss; } }; // Represents a contiguous piece of code or data within a section. Its size is @@ -188,6 +185,7 @@ public: // destructors. class MCFragment { friend class MCAssembler; + friend class MCStreamer; friend class MCObjectStreamer; friend class MCSection; @@ -206,7 +204,6 @@ public: FT_SymbolId, FT_CVInlineLines, FT_CVDefRange, - FT_PseudoProbe, }; private: @@ -234,11 +231,16 @@ protected: /// FT_Relaxable, x86-specific bool AllowAutoPadding : 1; + // Track content and fixups for the fixed-size part as fragments are + // appended to the section. The content remains immutable, except when + // modified by applyFixup. uint32_t ContentStart = 0; uint32_t ContentEnd = 0; uint32_t FixupStart = 0; uint32_t FixupEnd = 0; + // Track content and fixups for the optional variable-size tail part, + // typically modified during relaxation. uint32_t VarContentStart = 0; uint32_t VarContentEnd = 0; uint32_t VarFixupStart = 0; @@ -255,6 +257,19 @@ protected: uint32_t OperandSize; } relax; struct { + // The alignment to ensure, in bytes. + Align Alignment; + // The size of the integer (in bytes) of \p Value. + uint8_t FillLen; + // If true, fill with target-specific nop instructions. + bool EmitNops; + // The maximum number of bytes to emit; if the alignment + // cannot be satisfied in this width then this fragment is ignored. + unsigned MaxBytesToEmit; + // Value to use for filling padding bytes. + int64_t Fill; + } align; + struct { // True if this is a sleb128, false if uleb128. bool IsSigned; // The value this fragment should contain. @@ -283,10 +298,10 @@ public: return false; case MCFragment::FT_Relaxable: case MCFragment::FT_Data: + case MCFragment::FT_Align: case MCFragment::FT_Dwarf: case MCFragment::FT_DwarfFrame: case MCFragment::FT_LEB: - case MCFragment::FT_PseudoProbe: case MCFragment::FT_CVInlineLines: case MCFragment::FT_CVDefRange: return true; @@ -328,9 +343,9 @@ public: bool getAllowAutoPadding() const { return AllowAutoPadding; } void setAllowAutoPadding(bool V) { AllowAutoPadding = V; } - // Content-related functions manage parent's storage using ContentStart and + //== Content-related functions manage parent's storage using ContentStart and // ContentSize. - void clearContents() { ContentEnd = ContentStart; } + // Get a SmallVector reference. The caller should call doneAppending to update // `ContentEnd`. SmallVectorImpl<char> &getContentsForAppending() { @@ -354,7 +369,6 @@ public: getContentsForAppending().append(Num, Elt); doneAppending(); } - LLVM_ABI void setContents(ArrayRef<char> Contents); MutableArrayRef<char> getContents() { return MutableArrayRef(getParent()->ContentStorage) .slice(ContentStart, ContentEnd - ContentStart); @@ -386,7 +400,6 @@ public: void clearFixups() { FixupEnd = FixupStart; } LLVM_ABI void addFixup(MCFixup Fixup); LLVM_ABI void appendFixups(ArrayRef<MCFixup> Fixups); - LLVM_ABI void setFixups(ArrayRef<MCFixup> Fixups); MutableArrayRef<MCFixup> getFixups() { return MutableArrayRef(getParent()->FixupStorage) .slice(FixupStart, FixupEnd - FixupStart); @@ -441,7 +454,45 @@ public: llvm::copy(Inst, S.begin() + u.relax.OperandStart); } + //== FT_Align functions + void makeAlign(Align Alignment, int64_t Fill, uint8_t FillLen, + unsigned MaxBytesToEmit) { + Kind = FT_Align; + u.align.EmitNops = false; + u.align.Alignment = Alignment; + u.align.Fill = Fill; + u.align.FillLen = FillLen; + u.align.MaxBytesToEmit = MaxBytesToEmit; + } + + Align getAlignment() const { + assert(Kind == FT_Align); + return u.align.Alignment; + } + int64_t getAlignFill() const { + assert(Kind == FT_Align); + return u.align.Fill; + } + uint8_t getAlignFillLen() const { + assert(Kind == FT_Align); + return u.align.FillLen; + } + unsigned getAlignMaxBytesToEmit() const { + assert(Kind == FT_Align); + return u.align.MaxBytesToEmit; + } + bool hasAlignEmitNops() const { + assert(Kind == FT_Align); + return u.align.EmitNops; + } + //== FT_LEB functions + void makeLEB(bool IsSigned, const MCExpr *Value) { + assert(Kind == FT_Data); + Kind = MCFragment::FT_LEB; + u.leb.IsSigned = IsSigned; + u.leb.Value = Value; + } const MCExpr &getLEBValue() const { assert(Kind == FT_LEB); return *u.leb.Value; @@ -454,10 +505,6 @@ public: assert(Kind == FT_LEB); return u.leb.IsSigned; } - void setLEBSigned(bool S) { - assert(Kind == FT_LEB); - u.leb.IsSigned = S; - } //== FT_DwarfFrame functions const MCExpr &getDwarfAddrDelta() const { @@ -486,52 +533,6 @@ protected: : MCFragment(FType, HasInstructions) {} }; -class MCAlignFragment : public MCFragment { - /// Flag to indicate that (optimal) NOPs should be emitted instead - /// of using the provided value. The exact interpretation of this flag is - /// target dependent. - bool EmitNops : 1; - - /// The alignment to ensure, in bytes. - Align Alignment; - - /// The size of the integer (in bytes) of \p Value. - uint8_t FillLen; - - /// The maximum number of bytes to emit; if the alignment - /// cannot be satisfied in this width then this fragment is ignored. - unsigned MaxBytesToEmit; - - /// Value to use for filling padding bytes. - int64_t Fill; - - /// When emitting Nops some subtargets have specific nop encodings. - const MCSubtargetInfo *STI = nullptr; - -public: - MCAlignFragment(Align Alignment, int64_t Fill, uint8_t FillLen, - unsigned MaxBytesToEmit) - : MCFragment(FT_Align, false), EmitNops(false), Alignment(Alignment), - FillLen(FillLen), MaxBytesToEmit(MaxBytesToEmit), Fill(Fill) {} - - Align getAlignment() const { return Alignment; } - int64_t getFill() const { return Fill; } - uint8_t getFillLen() const { return FillLen; } - unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; } - - bool hasEmitNops() const { return EmitNops; } - void setEmitNops(bool Value, const MCSubtargetInfo *STI) { - EmitNops = Value; - this->STI = STI; - } - - const MCSubtargetInfo *getSubtargetInfo() const { return STI; } - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_Align; - } -}; - class MCFillFragment : public MCFragment { uint8_t ValueSize; /// Value to use for filling bytes. @@ -729,22 +730,6 @@ public: } }; -class MCPseudoProbeAddrFragment : public MCEncodedFragment { - /// The expression for the difference of the two symbols that - /// make up the address delta between two .pseudoprobe directives. - const MCExpr *AddrDelta; - -public: - MCPseudoProbeAddrFragment(const MCExpr *AddrDelta) - : MCEncodedFragment(FT_PseudoProbe, false), AddrDelta(AddrDelta) {} - - const MCExpr &getAddrDelta() const { return *AddrDelta; } - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_PseudoProbe; - } -}; - inline MCSection::iterator &MCSection::iterator::operator++() { F = F->Next; return *this; diff --git a/llvm/include/llvm/MC/MCSectionCOFF.h b/llvm/include/llvm/MC/MCSectionCOFF.h index 4472a12..f979413a 100644 --- a/llvm/include/llvm/MC/MCSectionCOFF.h +++ b/llvm/include/llvm/MC/MCSectionCOFF.h @@ -82,7 +82,6 @@ public: raw_ostream &OS, uint32_t Subsection) const override; bool useCodeAlign() const override; - StringRef getVirtualSectionKind() const override; unsigned getOrAssignWinCFISectionID(unsigned *NextID) const { if (WinCFISectionID == ~0U) diff --git a/llvm/include/llvm/MC/MCSectionELF.h b/llvm/include/llvm/MC/MCSectionELF.h index f09d305..64a4daf 100644 --- a/llvm/include/llvm/MC/MCSectionELF.h +++ b/llvm/include/llvm/MC/MCSectionELF.h @@ -68,10 +68,6 @@ private: Group.getPointer()->setIsSignature(); } - // TODO Delete after we stop supporting generation of GNU-style .zdebug_* - // sections. - void setSectionName(StringRef Name) { this->Name = Name; } - public: /// Decides whether a '.section' directive should be printed before the /// section name @@ -88,7 +84,6 @@ public: raw_ostream &OS, uint32_t Subsection) const override; bool useCodeAlign() const override; - StringRef getVirtualSectionKind() const override; bool isUnique() const { return UniqueID != NonUniqueID; } unsigned getUniqueID() const { return UniqueID; } diff --git a/llvm/include/llvm/MC/MCSectionGOFF.h b/llvm/include/llvm/MC/MCSectionGOFF.h index 9e3f95e..b166397 100644 --- a/llvm/include/llvm/MC/MCSectionGOFF.h +++ b/llvm/include/llvm/MC/MCSectionGOFF.h @@ -111,7 +111,7 @@ public: // Returns the text style for a section. Only defined for ED and PR sections. GOFF::ESDTextStyle getTextStyle() const { - assert((isED() || isPR() || isVirtualSection()) && "Expect ED or PR section"); + assert((isED() || isPR() || isBssSection()) && "Expect ED or PR section"); if (isED()) return EDAttributes.TextStyle; if (isPR()) diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 1f7c8b5..4bfc8f9 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -259,6 +259,8 @@ class LLVM_ABI MCStreamer { bool AllowAutoPadding = false; protected: + bool IsObj = false; + // Symbol of the current epilog for which we are processing SEH directives. WinEH::FrameInfo::Epilog *CurrentWinEpilog = nullptr; @@ -270,6 +272,8 @@ protected: /// section changes. virtual void changeSection(MCSection *, uint32_t); + void addFragment(MCFragment *F); + virtual void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame); virtual void emitCFIEndProcImpl(MCDwarfFrameInfo &CurFrame); @@ -308,6 +312,7 @@ public: virtual void reset(); MCContext &getContext() const { return Context; } + bool isObj() const { return IsObj; } // MCObjectStreamer has an MCAssembler and allows more expression folding at // parse time. @@ -425,11 +430,15 @@ public: } MCFragment *getCurrentFragment() const { + // Ensure consistency with the section stack. assert(!getCurrentSection().first || CurFrag->getParent() == getCurrentSection().first); + // Ensure we eagerly allocate an empty fragment after adding fragment with a + // variable-size tail. + assert(!CurFrag || CurFrag->getKind() == MCFragment::FT_Data); return CurFrag; } - + size_t getCurFragOffset() const { return getCurrentFragment()->Offset; } /// Save the current and previous section on the section stack. void pushSection() { SectionStack.push_back( @@ -1048,13 +1057,9 @@ public: virtual void emitSyntaxDirective(); - /// Record a relocation described by the .reloc directive. Return std::nullopt - /// if succeeded. Otherwise, return a pair (Name is invalid, error message). - virtual std::optional<std::pair<bool, std::string>> - emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr, - SMLoc Loc, const MCSubtargetInfo &STI) { - return std::nullopt; - } + /// Record a relocation described by the .reloc directive. + virtual void emitRelocDirective(const MCExpr &Offset, StringRef Name, + const MCExpr *Expr, SMLoc Loc = {}) {} virtual void emitAddrsig() {} virtual void emitAddrsigSym(const MCSymbol *Sym) {} diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 1036868..a3aa0d9 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -1312,7 +1312,7 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const { case ELF::EM_PPC: return (IsLittleEndian ? "elf32-powerpcle" : "elf32-powerpc"); case ELF::EM_RISCV: - return "elf32-littleriscv"; + return (IsLittleEndian ? "elf32-littleriscv" : "elf32-bigriscv"); case ELF::EM_CSKY: return "elf32-csky"; case ELF::EM_SPARC: @@ -1338,7 +1338,7 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const { case ELF::EM_PPC64: return (IsLittleEndian ? "elf64-powerpcle" : "elf64-powerpc"); case ELF::EM_RISCV: - return "elf64-littleriscv"; + return (IsLittleEndian ? "elf64-littleriscv" : "elf64-bigriscv"); case ELF::EM_S390: return "elf64-s390"; case ELF::EM_SPARCV9: @@ -1400,9 +1400,9 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const { case ELF::EM_RISCV: switch (EF.getHeader().e_ident[ELF::EI_CLASS]) { case ELF::ELFCLASS32: - return Triple::riscv32; + return IsLittleEndian ? Triple::riscv32 : Triple::riscv32be; case ELF::ELFCLASS64: - return Triple::riscv64; + return IsLittleEndian ? Triple::riscv64 : Triple::riscv64be; default: report_fatal_error("Invalid ELFCLASS!"); } diff --git a/llvm/include/llvm/Object/SFrameParser.h b/llvm/include/llvm/Object/SFrameParser.h new file mode 100644 index 0000000..cf4fe20 --- /dev/null +++ b/llvm/include/llvm/Object/SFrameParser.h @@ -0,0 +1,48 @@ +//===- SFrameParser.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_SFRAME_H +#define LLVM_OBJECT_SFRAME_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/SFrame.h" +#include "llvm/Support/Error.h" +#include <cstdint> + +namespace llvm { +namespace object { + +template <endianness E> class SFrameParser { +public: + static Expected<SFrameParser> create(ArrayRef<uint8_t> Contents); + + const sframe::Preamble<E> &getPreamble() const { return Header.Preamble; } + const sframe::Header<E> &getHeader() const { return Header; } + + bool usesFixedRAOffset() const { + return getHeader().ABIArch == sframe::ABI::AMD64EndianLittle; + } + bool usesFixedFPOffset() const { + return false; // Not used in any currently defined ABI. + } + +private: + ArrayRef<uint8_t> Data; + const sframe::Header<E> &Header; + + SFrameParser(ArrayRef<uint8_t> Data, const sframe::Header<E> &Header) + : Data(Data), Header(Header) {} +}; + +extern template class SFrameParser<endianness::big>; +extern template class SFrameParser<endianness::little>; + +} // end namespace object +} // end namespace llvm + +#endif // LLVM_OBJECT_SFRAME_H diff --git a/llvm/include/llvm/Pass.h b/llvm/include/llvm/Pass.h index 2ecd47d..f3962c3 100644 --- a/llvm/include/llvm/Pass.h +++ b/llvm/include/llvm/Pass.h @@ -114,6 +114,10 @@ public: /// Registration templates, but can be overloaded directly. virtual StringRef getPassName() const; + /// Return a nice clean name for a pass + /// corresponding to that used to enable the pass in opt. + StringRef getPassArgument() const; + /// getPassID - Return the PassID number that corresponds to this pass. AnalysisID getPassID() const { return PassID; diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index a8176eb..b0360f1 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -281,7 +281,7 @@ protected: FunctionPassManager FPM; FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))); - FPM.addPass(InvalidateAnalysisPass<MachineFunctionAnalysis>()); + FPM.addPass(FreeMachineFunctionPass()); if (this->PB.AddInCGSCCOrder) { MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( createCGSCCToFunctionPassAdaptor(std::move(FPM)))); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 66051d7..fc81ab7 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1132,14 +1132,14 @@ def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg, def udiv_by_const : GICombineRule< (defs root:$root), (match (G_UDIV $dst, $x, $y):$root, - [{ return Helper.matchUDivorURemByConst(*${root}); }]), - (apply [{ Helper.applyUDivorURemByConst(*${root}); }])>; + [{ return Helper.matchUDivOrURemByConst(*${root}); }]), + (apply [{ Helper.applyUDivOrURemByConst(*${root}); }])>; def sdiv_by_const : GICombineRule< (defs root:$root), (match (G_SDIV $dst, $x, $y):$root, - [{ return Helper.matchSDivByConst(*${root}); }]), - (apply [{ Helper.applySDivByConst(*${root}); }])>; + [{ return Helper.matchSDivOrSRemByConst(*${root}); }]), + (apply [{ Helper.applySDivOrSRemByConst(*${root}); }])>; def sdiv_by_pow2 : GICombineRule< (defs root:$root), @@ -1159,10 +1159,16 @@ def intdiv_combines : GICombineGroup<[udiv_by_pow2, sdiv_by_pow2, def urem_by_const : GICombineRule< (defs root:$root), (match (G_UREM $dst, $x, $y):$root, - [{ return Helper.matchUDivorURemByConst(*${root}); }]), - (apply [{ Helper.applyUDivorURemByConst(*${root}); }])>; + [{ return Helper.matchUDivOrURemByConst(*${root}); }]), + (apply [{ Helper.applyUDivOrURemByConst(*${root}); }])>; -def intrem_combines : GICombineGroup<[urem_by_const]>; +def srem_by_const : GICombineRule< + (defs root:$root), + (match (G_SREM $dst, $x, $y):$root, + [{ return Helper.matchSDivOrSRemByConst(*${root}); }]), + (apply [{ Helper.applySDivOrSRemByConst(*${root}); }])>; + +def intrem_combines : GICombineGroup<[urem_by_const, srem_by_const]>; def reassoc_ptradd : GICombineRule< (defs root:$root, build_fn_matchinfo:$matchinfo), diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 7577792..b65a63b 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -78,6 +78,8 @@ def : GINodeEquiv<G_XOR, xor>; def : GINodeEquiv<G_SHL, shl>; def : GINodeEquiv<G_LSHR, srl>; def : GINodeEquiv<G_ASHR, sra>; +def : GINodeEquiv<G_ABDS, abds>; +def : GINodeEquiv<G_ABDU, abdu>; def : GINodeEquiv<G_SADDSAT, saddsat>; def : GINodeEquiv<G_UADDSAT, uaddsat>; def : GINodeEquiv<G_SSUBSAT, ssubsat>; diff --git a/llvm/include/llvm/TargetParser/Host.h b/llvm/include/llvm/TargetParser/Host.h index be3d41e..40a9b6c 100644 --- a/llvm/include/llvm/TargetParser/Host.h +++ b/llvm/include/llvm/TargetParser/Host.h @@ -53,7 +53,7 @@ LLVM_ABI StringRef getHostCPUName(); /// which features may appear in this map, except that they are all valid LLVM /// feature names. The map can be empty, for example if feature detection /// fails. -LLVM_ABI const StringMap<bool, MallocAllocator> getHostCPUFeatures(); +LLVM_ABI StringMap<bool, MallocAllocator> getHostCPUFeatures(); /// This is a function compatible with cl::AddExtraVersionPrinter, which adds /// info about the current target triple and detected CPU. diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index 57d771b..670a632 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -49,62 +49,64 @@ public: enum ArchType { UnknownArch, - arm, // ARM (little endian): arm, armv.*, xscale - armeb, // ARM (big endian): armeb - aarch64, // AArch64 (little endian): aarch64 - aarch64_be, // AArch64 (big endian): aarch64_be - aarch64_32, // AArch64 (little endian) ILP32: aarch64_32 - arc, // ARC: Synopsys ARC - avr, // AVR: Atmel AVR microcontroller - bpfel, // eBPF or extended BPF or 64-bit BPF (little endian) - bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian) - csky, // CSKY: csky - dxil, // DXIL 32-bit DirectX bytecode - hexagon, // Hexagon: hexagon - loongarch32, // LoongArch (32-bit): loongarch32 - loongarch64, // LoongArch (64-bit): loongarch64 - m68k, // M68k: Motorola 680x0 family - mips, // MIPS: mips, mipsallegrex, mipsr6 - mipsel, // MIPSEL: mipsel, mipsallegrexe, mipsr6el - mips64, // MIPS64: mips64, mips64r6, mipsn32, mipsn32r6 - mips64el, // MIPS64EL: mips64el, mips64r6el, mipsn32el, mipsn32r6el - msp430, // MSP430: msp430 - ppc, // PPC: powerpc - ppcle, // PPCLE: powerpc (little endian) - ppc64, // PPC64: powerpc64, ppu - ppc64le, // PPC64LE: powerpc64le - r600, // R600: AMD GPUs HD2XXX - HD6XXX - amdgcn, // AMDGCN: AMD GCN GPUs - riscv32, // RISC-V (32-bit): riscv32 - riscv64, // RISC-V (64-bit): riscv64 - sparc, // Sparc: sparc - sparcv9, // Sparcv9: Sparcv9 - sparcel, // Sparc: (endianness = little). NB: 'Sparcle' is a CPU variant - systemz, // SystemZ: s390x - tce, // TCE (http://tce.cs.tut.fi/): tce - tcele, // TCE little endian (http://tce.cs.tut.fi/): tcele - thumb, // Thumb (little endian): thumb, thumbv.* - thumbeb, // Thumb (big endian): thumbeb - x86, // X86: i[3-9]86 - x86_64, // X86-64: amd64, x86_64 - xcore, // XCore: xcore - xtensa, // Tensilica: Xtensa - nvptx, // NVPTX: 32-bit - nvptx64, // NVPTX: 64-bit - amdil, // AMDIL - amdil64, // AMDIL with 64-bit pointers - hsail, // AMD HSAIL - hsail64, // AMD HSAIL with 64-bit pointers - spir, // SPIR: standard portable IR for OpenCL 32-bit version - spir64, // SPIR: standard portable IR for OpenCL 64-bit version - spirv, // SPIR-V with logical memory layout. - spirv32, // SPIR-V with 32-bit pointers - spirv64, // SPIR-V with 64-bit pointers - kalimba, // Kalimba: generic kalimba - shave, // SHAVE: Movidius vector VLIW processors - lanai, // Lanai: Lanai 32-bit - wasm32, // WebAssembly with 32-bit pointers - wasm64, // WebAssembly with 64-bit pointers + arm, // ARM (little endian): arm, armv.*, xscale + armeb, // ARM (big endian): armeb + aarch64, // AArch64 (little endian): aarch64 + aarch64_be, // AArch64 (big endian): aarch64_be + aarch64_32, // AArch64 (little endian) ILP32: aarch64_32 + arc, // ARC: Synopsys ARC + avr, // AVR: Atmel AVR microcontroller + bpfel, // eBPF or extended BPF or 64-bit BPF (little endian) + bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian) + csky, // CSKY: csky + dxil, // DXIL 32-bit DirectX bytecode + hexagon, // Hexagon: hexagon + loongarch32, // LoongArch (32-bit): loongarch32 + loongarch64, // LoongArch (64-bit): loongarch64 + m68k, // M68k: Motorola 680x0 family + mips, // MIPS: mips, mipsallegrex, mipsr6 + mipsel, // MIPSEL: mipsel, mipsallegrexe, mipsr6el + mips64, // MIPS64: mips64, mips64r6, mipsn32, mipsn32r6 + mips64el, // MIPS64EL: mips64el, mips64r6el, mipsn32el, mipsn32r6el + msp430, // MSP430: msp430 + ppc, // PPC: powerpc + ppcle, // PPCLE: powerpc (little endian) + ppc64, // PPC64: powerpc64, ppu + ppc64le, // PPC64LE: powerpc64le + r600, // R600: AMD GPUs HD2XXX - HD6XXX + amdgcn, // AMDGCN: AMD GCN GPUs + riscv32, // RISC-V (32-bit, little endian): riscv32 + riscv64, // RISC-V (64-bit, little endian): riscv64 + riscv32be, // RISC-V (32-bit, big endian): riscv32be + riscv64be, // RISC-V (64-bit, big endian): riscv64be + sparc, // Sparc: sparc + sparcv9, // Sparcv9: Sparcv9 + sparcel, // Sparc: (endianness = little). NB: 'Sparcle' is a CPU variant + systemz, // SystemZ: s390x + tce, // TCE (http://tce.cs.tut.fi/): tce + tcele, // TCE little endian (http://tce.cs.tut.fi/): tcele + thumb, // Thumb (little endian): thumb, thumbv.* + thumbeb, // Thumb (big endian): thumbeb + x86, // X86: i[3-9]86 + x86_64, // X86-64: amd64, x86_64 + xcore, // XCore: xcore + xtensa, // Tensilica: Xtensa + nvptx, // NVPTX: 32-bit + nvptx64, // NVPTX: 64-bit + amdil, // AMDIL + amdil64, // AMDIL with 64-bit pointers + hsail, // AMD HSAIL + hsail64, // AMD HSAIL with 64-bit pointers + spir, // SPIR: standard portable IR for OpenCL 32-bit version + spir64, // SPIR: standard portable IR for OpenCL 64-bit version + spirv, // SPIR-V with logical memory layout. + spirv32, // SPIR-V with 32-bit pointers + spirv64, // SPIR-V with 64-bit pointers + kalimba, // Kalimba: generic kalimba + shave, // SHAVE: Movidius vector VLIW processors + lanai, // Lanai: Lanai 32-bit + wasm32, // WebAssembly with 32-bit pointers + wasm64, // WebAssembly with 64-bit pointers renderscript32, // 32-bit RenderScript renderscript64, // 64-bit RenderScript ve, // NEC SX-Aurora Vector Engine @@ -1064,10 +1066,14 @@ public: } /// Tests whether the target is 32-bit RISC-V. - bool isRISCV32() const { return getArch() == Triple::riscv32; } + bool isRISCV32() const { + return getArch() == Triple::riscv32 || getArch() == Triple::riscv32be; + } /// Tests whether the target is 64-bit RISC-V. - bool isRISCV64() const { return getArch() == Triple::riscv64; } + bool isRISCV64() const { + return getArch() == Triple::riscv64 || getArch() == Triple::riscv64be; + } /// Tests whether the target is RISC-V (32- and 64-bit). bool isRISCV() const { return isRISCV32() || isRISCV64(); } diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index bb79d25..3f5f427 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -325,7 +325,6 @@ LLVM_ABI void salvageDebugInfo(Instruction &I); /// Mark undef if salvaging cannot be completed. LLVM_ABI void salvageDebugInfoForDbgValues(Instruction &I, - ArrayRef<DbgVariableIntrinsic *> Insns, ArrayRef<DbgVariableRecord *> DPInsns); /// Given an instruction \p I and DIExpression \p DIExpr operating on diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h index 4e5da81..9500b1f 100644 --- a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h +++ b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h @@ -29,7 +29,6 @@ template <typename T> class SSAUpdaterTraits; class Type; class Use; class Value; -class DbgValueInst; /// Helper class for SSA formation on a set of values defined in /// multiple blocks. @@ -123,8 +122,6 @@ public: /// value set to the new SSA value if available, and undef if not. void UpdateDebugValues(Instruction *I); void UpdateDebugValues(Instruction *I, - SmallVectorImpl<DbgValueInst *> &DbgValues); - void UpdateDebugValues(Instruction *I, SmallVectorImpl<DbgVariableRecord *> &DbgValues); /// Rewrite a use like \c RewriteUse but handling in-block definitions. @@ -136,7 +133,6 @@ public: private: Value *GetValueAtEndOfBlockInternal(BasicBlock *BB); - void UpdateDebugValue(Instruction *I, DbgValueInst *DbgValue); void UpdateDebugValue(Instruction *I, DbgVariableRecord *DbgValue); }; diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index a101151..39fef92 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -530,6 +530,7 @@ private: bool isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L); + Value *tryToReuseLCSSAPhi(const SCEVAddRecExpr *S); Value *expandAddRecExprLiterally(const SCEVAddRecExpr *); PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, const Loop *L, Type *&TruncTy, |