diff options
Diffstat (limited to 'llvm/include')
40 files changed, 513 insertions, 188 deletions
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 73bfe1a..af6e534 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -236,8 +236,8 @@ public: /// In same cases when the dependency check fails we can still /// vectorize the loop with a dynamic array access check. - bool shouldRetryWithRuntimeCheck() const { - return FoundNonConstantDistanceDependence && + bool shouldRetryWithRuntimeChecks() const { + return ShouldRetryWithRuntimeChecks && Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks; } @@ -327,9 +327,9 @@ private: uint64_t MaxStoreLoadForwardSafeDistanceInBits = std::numeric_limits<uint64_t>::max(); - /// If we see a non-constant dependence distance we can still try to - /// vectorize this loop with runtime checks. - bool FoundNonConstantDistanceDependence = false; + /// Whether we should try to vectorize the loop with runtime checks, if the + /// dependencies are not safe. + bool ShouldRetryWithRuntimeChecks = false; /// Result of the dependence checks, indicating whether the checked /// dependences are safe for vectorization, require RT checks or are known to diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 98b793a..7928835 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1930,7 +1930,7 @@ public: /// Returns a bitmask constructed from the target-features or fmv-features /// metadata of a function. - LLVM_ABI uint64_t getFeatureMask(const Function &F) const; + LLVM_ABI APInt getFeatureMask(const Function &F) const; /// Returns true if this is an instance of a function with multiple versions. LLVM_ABI bool isMultiversionedFunction(const Function &F) const; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index ddc8a5e..2ea87b3 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1126,7 +1126,9 @@ public: virtual bool hasArmWideBranch(bool) const { return false; } - virtual uint64_t getFeatureMask(const Function &F) const { return 0; } + virtual APInt getFeatureMask(const Function &F) const { + return APInt::getZero(32); + } virtual bool isMultiversionedFunction(const Function &F) const { return false; diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index c7e4bdf..a2311d2 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -181,6 +181,7 @@ enum Kind { kw_amdgpu_cs_chain_preserve, kw_amdgpu_kernel, kw_amdgpu_gfx, + kw_amdgpu_gfx_whole_wave, kw_tailcc, kw_m68k_rtdcc, kw_graalcc, diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index e4f82ad..ad35d7f 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -362,6 +362,7 @@ enum { ELFOSABI_FENIXOS = 16, // FenixOS ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI ELFOSABI_CUDA = 51, // NVIDIA CUDA architecture. + ELFOSABI_CUDA_V2 = 41, // NVIDIA CUDA architecture. ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime @@ -385,6 +386,12 @@ enum { ELFABIVERSION_AMDGPU_HSA_V6 = 4, }; +// CUDA OS ABI Version identification. +enum { + ELFABIVERSION_CUDA_V1 = 7, + ELFABIVERSION_CUDA_V2 = 8, +}; + #define ELF_RELOC(name, value) name = value, // X86_64 relocations. @@ -921,7 +928,7 @@ enum { // NVPTX specific e_flags. enum : unsigned { - // Processor selection mask for EF_CUDA_SM* values. + // Processor selection mask for EF_CUDA_SM* values prior to blackwell. EF_CUDA_SM = 0xff, // SM based processor values. @@ -954,12 +961,22 @@ enum : unsigned { // The target is using 64-bit addressing. EF_CUDA_64BIT_ADDRESS = 0x400, // Set when using the sm_90a processor. - EF_CUDA_ACCELERATORS = 0x800, + EF_CUDA_ACCELERATORS_V1 = 0x800, // Undocumented software feature. EF_CUDA_SW_FLAG_V2 = 0x1000, // Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values. EF_CUDA_VIRTUAL_SM = 0xff0000, + + // Processor selection mask for EF_CUDA_SM* values following blackwell. + EF_CUDA_SM_MASK = 0xff00, + + // SM based processor values. + EF_CUDA_SM100 = 0x6400, + EF_CUDA_SM120 = 0x7800, + + // Set when using an accelerator variant like sm_100a. + EF_CUDA_ACCELERATORS = 0x8, }; // ELF Relocation types for BPF diff --git a/llvm/include/llvm/BinaryFormat/SFrame.h b/llvm/include/llvm/BinaryFormat/SFrame.h index 16d3b16..98dbe38 100644 --- a/llvm/include/llvm/BinaryFormat/SFrame.h +++ b/llvm/include/llvm/BinaryFormat/SFrame.h @@ -15,33 +15,36 @@ #ifndef LLVM_BINARYFORMAT_SFRAME_H #define LLVM_BINARYFORMAT_SFRAME_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Endian.h" -namespace llvm::sframe { +namespace llvm { + +template <typename T> struct EnumEntry; + +namespace sframe { LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); constexpr uint16_t Magic = 0xdee2; enum class Version : uint8_t { - V1 = 1, - V2 = 2, +#define HANDLE_SFRAME_VERSION(CODE, NAME) NAME = CODE, +#include "llvm/BinaryFormat/SFrameConstants.def" }; enum class Flags : uint8_t { - FDESorted = 0x01, - FramePointer = 0x02, - FDEFuncStartPCRel = 0x04, +#define HANDLE_SFRAME_FLAG(CODE, NAME) NAME = CODE, +#include "llvm/BinaryFormat/SFrameConstants.def" V2AllFlags = FDESorted | FramePointer | FDEFuncStartPCRel, LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xff), }; enum class ABI : uint8_t { - AArch64EndianBig = 1, - AArch64EndianLittle = 2, - AMD64EndianLittle = 3, +#define HANDLE_SFRAME_ABI(CODE, NAME) NAME = CODE, +#include "llvm/BinaryFormat/SFrameConstants.def" }; /// SFrame FRE Types. Bits 0-3 of FuncDescEntry.Info. @@ -160,6 +163,11 @@ template <endianness E> using FrameRowEntryAddr1 = FrameRowEntry<uint8_t, E>; template <endianness E> using FrameRowEntryAddr2 = FrameRowEntry<uint16_t, E>; template <endianness E> using FrameRowEntryAddr4 = FrameRowEntry<uint32_t, E>; -} // namespace llvm::sframe +ArrayRef<EnumEntry<Version>> getVersions(); +ArrayRef<EnumEntry<Flags>> getFlags(); +ArrayRef<EnumEntry<ABI>> getABIs(); + +} // namespace sframe +} // namespace llvm #endif // LLVM_BINARYFORMAT_SFRAME_H diff --git a/llvm/include/llvm/BinaryFormat/SFrameConstants.def b/llvm/include/llvm/BinaryFormat/SFrameConstants.def new file mode 100644 index 0000000..643b15f --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/SFrameConstants.def @@ -0,0 +1,39 @@ +//===- SFrameConstants.def --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if !(defined(HANDLE_SFRAME_VERSION) || defined(HANDLE_SFRAME_FLAG) || \ + defined(HANDLE_SFRAME_ABI)) +#error "Missing HANDLE_SFRAME definition" +#endif + +#ifndef HANDLE_SFRAME_VERSION +#define HANDLE_SFRAME_VERSION(CODE, NAME) +#endif + +#ifndef HANDLE_SFRAME_FLAG +#define HANDLE_SFRAME_FLAG(CODE, NAME) +#endif + +#ifndef HANDLE_SFRAME_ABI +#define HANDLE_SFRAME_ABI(CODE, NAME) +#endif + +HANDLE_SFRAME_VERSION(0x01, V1) +HANDLE_SFRAME_VERSION(0x02, V2) + +HANDLE_SFRAME_FLAG(0x01, FDESorted) +HANDLE_SFRAME_FLAG(0x02, FramePointer) +HANDLE_SFRAME_FLAG(0x04, FDEFuncStartPCRel) + +HANDLE_SFRAME_ABI(0x01, AArch64EndianBig) +HANDLE_SFRAME_ABI(0x02, AArch64EndianLittle) +HANDLE_SFRAME_ABI(0x03, AMD64EndianLittle) + +#undef HANDLE_SFRAME_VERSION +#undef HANDLE_SFRAME_FLAG +#undef HANDLE_SFRAME_ABI diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h index da73238..490d1a3 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h @@ -103,6 +103,20 @@ public: /// \return The known alignment for the pointer-like value \p R. Align computeKnownAlignment(Register R, unsigned Depth = 0); + /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts + /// that are all less than the element bit-width of the shift node, return the + /// valid constant range. + std::optional<ConstantRange> + getValidShiftAmountRange(Register R, const APInt &DemandedElts, + unsigned Depth); + + /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts + /// that are all less than the element bit-width of the shift node, return the + /// minimum possible value. + std::optional<uint64_t> getValidMinimumShiftAmount(Register R, + const APInt &DemandedElts, + unsigned Depth = 0); + /// Determine which floating-point classes are valid for \p V, and return them /// in KnownFPClass bit sets. /// diff --git a/llvm/include/llvm/CodeGen/MachineInstrBundle.h b/llvm/include/llvm/CodeGen/MachineInstrBundle.h index d324236..ebf7534 100644 --- a/llvm/include/llvm/CodeGen/MachineInstrBundle.h +++ b/llvm/include/llvm/CodeGen/MachineInstrBundle.h @@ -15,6 +15,7 @@ #define LLVM_CODEGEN_MACHINEINSTRBUNDLE_H #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/Support/Compiler.h" namespace llvm { @@ -294,6 +295,12 @@ LLVM_ABI PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg, const TargetRegisterInfo *TRI); +class FinalizeBundleTestPass : public PassInfoMixin<FinalizeBundleTestPass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; + } // End llvm namespace #endif diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index e7a7091..efda7eb 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -65,7 +65,7 @@ // // void <SubTarget>Subtarget:: // overrideSchedPolicy(MachineSchedPolicy &Policy, -// unsigned NumRegionInstrs) const { +// const SchedRegion &Region) const { // Policy.<Flag> = true; // } // @@ -218,6 +218,22 @@ struct MachineSchedPolicy { MachineSchedPolicy() = default; }; +/// A region of an MBB for scheduling. +struct SchedRegion { + /// RegionBegin is the first instruction in the scheduling region, and + /// RegionEnd is either MBB->end() or the scheduling boundary after the + /// last instruction in the scheduling region. These iterators cannot refer + /// to instructions outside of the identified scheduling region because + /// those may be reordered before scheduling this region. + MachineBasicBlock::iterator RegionBegin; + MachineBasicBlock::iterator RegionEnd; + unsigned NumRegionInstrs; + + SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E, + unsigned N) + : RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {} +}; + /// MachineSchedStrategy - Interface to the scheduling algorithm used by /// ScheduleDAGMI. /// diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 714285e..095a40e 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -438,10 +438,6 @@ LLVM_ABI extern char &UnpackMachineBundlesID; LLVM_ABI FunctionPass * createUnpackMachineBundles(std::function<bool(const MachineFunction &)> Ftor); -/// FinalizeMachineBundles - This pass finalize machine instruction -/// bundles (created earlier, e.g. during pre-RA scheduling). -LLVM_ABI extern char &FinalizeMachineBundlesID; - /// StackMapLiveness - This pass analyses the register live-out set of /// stackmap/patchpoint intrinsics and attaches the calculated information to /// the intrinsic for later emission to the StackMap. diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 657951d..eac8e14 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1202,13 +1202,16 @@ public: LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, const SDNodeFlags Flags); LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, - ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops); + ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops, + const SDNodeFlags Flags); LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, ArrayRef<SDValue> Ops, const SDNodeFlags Flags); // Use flags from current flag inserter. LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops); + LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, + ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops); LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, ArrayRef<SDValue> Ops); LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, @@ -1346,9 +1349,10 @@ public: /// Helper function to make it easier to build SelectCC's if you just have an /// ISD::CondCode instead of an SDValue. SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, - SDValue False, ISD::CondCode Cond) { + SDValue False, ISD::CondCode Cond, + SDNodeFlags Flags = SDNodeFlags()) { return getNode(ISD::SELECT_CC, DL, True.getValueType(), LHS, RHS, True, - False, getCondCode(Cond)); + False, getCondCode(Cond), Flags); } /// Try to simplify a select/vselect into 1 of its operands or a constant. @@ -1425,10 +1429,9 @@ public: /// Creates a LifetimeSDNode that starts (`IsStart==true`) or ends /// (`IsStart==false`) the lifetime of the portion of `FrameIndex` between - /// offsets `Offset` and `Offset + Size`. + /// offsets `0` and `Size`. LLVM_ABI SDValue getLifetimeNode(bool IsStart, const SDLoc &dl, SDValue Chain, - int FrameIndex, int64_t Size, - int64_t Offset = -1); + int FrameIndex, int64_t Size); /// Creates a PseudoProbeSDNode with function GUID `Guid` and /// the index of the block `Index` it is probing, as well as the attributes diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 5d9937f..8e9c1f7 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -2004,25 +2004,17 @@ public: class LifetimeSDNode : public SDNode { friend class SelectionDAG; int64_t Size; - int64_t Offset; // -1 if offset is unknown. LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, - SDVTList VTs, int64_t Size, int64_t Offset) - : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {} + SDVTList VTs, int64_t Size) + : SDNode(Opcode, Order, dl, VTs), Size(Size) {} + public: int64_t getFrameIndex() const { return cast<FrameIndexSDNode>(getOperand(1))->getIndex(); } - bool hasOffset() const { return Offset >= 0; } - int64_t getOffset() const { - assert(hasOffset() && "offset is unknown"); - return Offset; - } - int64_t getSize() const { - assert(hasOffset() && "offset is unknown"); - return Size; - } + int64_t getSize() const { return Size; } // Methods to support isa and dyn_cast static bool classof(const SDNode *N) { diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 1a548a5..cbdc1b6 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3219,25 +3219,19 @@ public: /// Lower an interleaved store to target specific intrinsics. Return /// true on success. /// - /// \p SI is the vector store instruction. + /// \p SI is the vector store instruction. Can be either a plain store + /// or a vp.store. + /// \p Mask is a per-segment (i.e. number of lanes equal to that of one + /// component being interwoven) mask. Can be nullptr, in which case the + /// result is unconditional. /// \p SVI is the shufflevector to RE-interleave the stored vector. /// \p Factor is the interleave factor. - virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, + virtual bool lowerInterleavedStore(Instruction *Store, Value *Mask, + ShuffleVectorInst *SVI, unsigned Factor) const { return false; } - /// Lower an interleaved store to target specific intrinsics. Return - /// true on success. - /// - /// \p Store is the vp.store instruction. - /// \p Mask is a mask value - /// \p InterleaveOps is a list of values being interleaved. - virtual bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask, - ArrayRef<Value *> InterleaveOps) const { - return false; - } - /// Lower a deinterleave intrinsic to a target specific load intrinsic. /// Return true on success. Currently only supports /// llvm.vector.deinterleave{2,3,5,7} diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h index 45e67d8..a8c7a8a 100644 --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -54,6 +54,7 @@ class TargetRegisterClass; class TargetRegisterInfo; class TargetSchedModel; class Triple; +struct SchedRegion; //===----------------------------------------------------------------------===// /// @@ -231,7 +232,7 @@ public: /// scheduling heuristics (no custom MachineSchedStrategy) to make /// changes to the generic scheduling policy. virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const {} + const SchedRegion &Region) const {} /// Override generic post-ra scheduling policy within a region. /// @@ -241,7 +242,7 @@ public: /// Note that some options like tracking register pressure won't take effect /// in post-ra scheduling. virtual void overridePostRASchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const {} + const SchedRegion &Region) const {} // Perform target-specific adjustments to the latency of a schedule // dependency. diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index de888ff..7919f7a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -779,16 +779,17 @@ struct LinkT { template <typename T, typename I, typename E> // struct MapT { using LocatorList = ObjectListT<I, E>; - ENUM(MapType, To, From, Tofrom, Alloc, Release, Delete); - ENUM(MapTypeModifier, Always, Close, Present, OmpxHold); + ENUM(MapType, To, From, Tofrom, Storage); + ENUM(MapTypeModifier, Always, Close, Delete, Present, Self, OmpxHold); + ENUM(RefModifier, RefPtee, RefPtr, RefPtrPtee); // See note at the definition of the MapperT type. using Mappers = ListT<type::MapperT<I, E>>; // Not a spec name using Iterator = type::IteratorT<T, I, E>; using MapTypeModifiers = ListT<MapTypeModifier>; // Not a spec name using TupleTrait = std::true_type; - std::tuple<OPT(MapType), OPT(MapTypeModifiers), OPT(Mappers), OPT(Iterator), - LocatorList> + std::tuple<OPT(MapType), OPT(MapTypeModifiers), OPT(RefModifier), + OPT(Mappers), OPT(Iterator), LocatorList> t; }; diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 611bfe3..047baa3 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -708,6 +708,7 @@ bool ConstructDecompositionT<C, H>::applyClause( tomp::clause::MapT<TypeTy, IdTy, ExprTy>{ {/*MapType=*/MapType::Tofrom, /*MapTypeModifier=*/std::nullopt, + /*RefModifier=*/std::nullopt, /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt, /*LocatorList=*/std::move(tofrom)}}); dirTarget->clauses.push_back(map); @@ -969,8 +970,8 @@ bool ConstructDecompositionT<C, H>::applyClause( llvm::omp::Clause::OMPC_map, tomp::clause::MapT<TypeTy, IdTy, ExprTy>{ {/*MapType=*/MapType::Tofrom, /*MapTypeModifier=*/std::nullopt, - /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt, - /*LocatorList=*/std::move(tofrom)}}); + /*RefModifier=*/std::nullopt, /*Mapper=*/std::nullopt, + /*Iterator=*/std::nullopt, /*LocatorList=*/std::move(tofrom)}}); dirTarget->clauses.push_back(map); applied = true; diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h index d68491e..ef761eb 100644 --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -284,6 +284,9 @@ namespace CallingConv { RISCV_VLSCall_32768 = 122, RISCV_VLSCall_65536 = 123, + // Calling convention for AMDGPU whole wave functions. + AMDGPU_Gfx_WholeWave = 124, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; @@ -294,8 +297,13 @@ namespace CallingConv { /// directly or indirectly via a call-like instruction. constexpr bool isCallableCC(CallingConv::ID CC) { switch (CC) { + // Called with special intrinsics: + // llvm.amdgcn.cs.chain case CallingConv::AMDGPU_CS_Chain: case CallingConv::AMDGPU_CS_ChainPreserve: + // llvm.amdgcn.call.whole.wave + case CallingConv::AMDGPU_Gfx_WholeWave: + // Hardware entry points: case CallingConv::AMDGPU_CS: case CallingConv::AMDGPU_ES: case CallingConv::AMDGPU_GS: diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h index f8241a3..c529a86 100644 --- a/llvm/include/llvm/IR/DebugInfo.h +++ b/llvm/include/llvm/IR/DebugInfo.h @@ -39,30 +39,26 @@ class DbgVariableRecord; class Instruction; class Module; -/// Finds dbg.declare intrinsics declaring local variables as living in the +/// Finds dbg.declare records declaring local variables as living in the /// memory that 'V' points to. -LLVM_ABI TinyPtrVector<DbgDeclareInst *> findDbgDeclares(Value *V); -/// As above, for DVRDeclares. LLVM_ABI TinyPtrVector<DbgVariableRecord *> findDVRDeclares(Value *V); /// As above, for DVRValues. LLVM_ABI TinyPtrVector<DbgVariableRecord *> findDVRValues(Value *V); -/// Finds the llvm.dbg.value intrinsics describing a value. -LLVM_ABI void findDbgValues( - SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V, - SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords = nullptr); - -/// Finds the debug info intrinsics describing a value. -LLVM_ABI void findDbgUsers( - SmallVectorImpl<DbgVariableIntrinsic *> &DbgInsts, Value *V, - SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords = nullptr); +/// Finds the debug info records describing a value. +LLVM_ABI void +findDbgUsers(Value *V, + SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords); +/// Finds the dbg.values describing a value. +LLVM_ABI void +findDbgValues(Value *V, + SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords); /// Find subprogram that is enclosing this scope. LLVM_ABI DISubprogram *getDISubprogram(const MDNode *Scope); /// Produce a DebugLoc to use for each dbg.declare that is promoted to a /// dbg.value. -LLVM_ABI DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII); LLVM_ABI DebugLoc getDebugValueLoc(DbgVariableRecord *DVR); /// Strip debug info in the module if it exists. @@ -192,13 +188,6 @@ using AssignmentInstRange = /// Iterators invalidated by adding or removing DIAssignID metadata to/from any /// instruction (including by deleting or cloning instructions). LLVM_ABI AssignmentInstRange getAssignmentInsts(DIAssignID *ID); -/// Return a range of instructions (typically just one) that perform the -/// assignment that \p DAI encodes. -/// Iterators invalidated by adding or removing DIAssignID metadata to/from any -/// instruction (including by deleting or cloning instructions). -inline AssignmentInstRange getAssignmentInsts(const DbgAssignIntrinsic *DAI) { - return getAssignmentInsts(DAI->getAssignID()); -} inline AssignmentInstRange getAssignmentInsts(const DbgVariableRecord *DVR) { assert(DVR->isDbgAssign() && diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index ecda6c4..8bfa345 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -3717,6 +3717,20 @@ class AMDGPUWmmaIntrinsicModsAllDiff<LLVMType DstTy, LLVMType AB, LLVMType C> : IntrWillReturn, IntrNoCallback, IntrNoFree] >; +class AMDGPUWmmaIntrinsicModsC_MatrixFMT : + Intrinsic< + [llvm_anyfloat_ty], // %D + [ + llvm_i32_ty, // matrix_a_fmt + llvm_anyint_ty, // %A + llvm_i32_ty, // matrix_b_fmt + llvm_anyint_ty, // %B + llvm_i16_ty, // %C_mod: 0 - none, 1 - neg, 2 - abs, 3 - neg(abs) + LLVMMatchType<0>, // %C + ], + [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree] +>; + defset list<Intrinsic> AMDGPUWMMAIntrinsicsGFX1250 = { def int_amdgcn_wmma_f32_16x16x4_f32 : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>; def int_amdgcn_wmma_f32_16x16x32_bf16 : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>; @@ -3741,6 +3755,7 @@ def int_amdgcn_wmma_f32_16x16x128_fp8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint def int_amdgcn_wmma_f32_16x16x128_bf8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>; def int_amdgcn_wmma_f32_16x16x128_bf8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>; def int_amdgcn_wmma_i32_16x16x64_iu8 : AMDGPUWmmaIntrinsicModsAB<llvm_anyint_ty, llvm_anyint_ty>; +def int_amdgcn_wmma_f32_16x16x128_f8f6f4 : AMDGPUWmmaIntrinsicModsC_MatrixFMT; def int_amdgcn_wmma_f32_32x16x128_f4 : AMDGPUWmmaIntrinsicF4ModsC<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty>; } diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 5ddc144..967d166 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -331,6 +331,11 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> { !eq(gf,"m8n16:x2") : !listsplat(llvm_i32_ty, 2), !eq(gf,"m8n16:x4") : !listsplat(llvm_i32_ty, 4), + // stmatrix b8 -> s32 @ m16n8 + !eq(gf,"m16n8:x1") : !listsplat(llvm_i32_ty, 1), + !eq(gf,"m16n8:x2") : !listsplat(llvm_i32_ty, 2), + !eq(gf,"m16n8:x4") : !listsplat(llvm_i32_ty, 4), + ); } @@ -403,6 +408,17 @@ class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> { !subst("llvm.", "int_", intr)); } +class STMATRIX_NAME<WMMA_REGS Frag, int Trans> { + string intr = "llvm.nvvm.stmatrix.sync.aligned" + # "." # Frag.geom + # "." # Frag.frag + # !if(Trans, ".trans", "") + # "." # Frag.ptx_elt_type + ; + string record = !subst(".", "_", + !subst("llvm.", "int_", intr)); +} + // Generates list of 4-tuples of WMMA_REGS representing a valid MMA op. // Geom: list of supported geometries. // TypeN: PTX type of the corresponding fragment's element. @@ -443,6 +459,16 @@ class LDMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> { list<string> ops = !foreach(x, ret, x.gft); } +class STMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> { + list<WMMA_REGS> ret = + !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1, + !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2, + !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3, + [WMMA_REGS<geom, frag, type>])))))); + // Debugging aid for readable representation of the list above. + list<string> ops = !foreach(x, ret, x.gft); +} + // Creates list of valid combinations of fragments. This is the main list that // drives generation of corresponding intrinsics and instructions. class NVVM_MMA_OPS { @@ -537,9 +563,18 @@ class NVVM_MMA_OPS { list<WMMA_REGS> ldmatrix_geom_m8n16_ops = LDMATRIX_OPS< ["m8n16"], ["x1", "x2", "x4"], ["b8x16.b6x16_p32", "b8x16.b4x16_p64"]>.ret; + list<WMMA_REGS> stmatrix_b16_ops = STMATRIX_OPS< + ["m8n8"], ["x1", "x2", "x4"], ["b16"]>.ret; + + list<WMMA_REGS> stmatrix_b8_ops = STMATRIX_OPS< + ["m16n8"], ["x1", "x2", "x4"], ["b8"]>.ret; + list<WMMA_REGS> all_ldmatrix_ops = !listconcat(ldmatrix_b16_ops, ldmatrix_geom_m16n16_ops, ldmatrix_geom_m8n16_ops); + + list<WMMA_REGS> all_stmatrix_ops = !listconcat(stmatrix_b16_ops, + stmatrix_b8_ops); } def NVVM_MMA_OPS : NVVM_MMA_OPS; @@ -680,6 +715,19 @@ class NVVM_LDMATRIX_SUPPORTED<WMMA_REGS frag, bit trans> { ); } +// Returns true if the fragment is valid for stmatrix ops is supported; +// false otherwise. +class NVVM_STMATRIX_SUPPORTED<WMMA_REGS frag, bit trans> { + string g = frag.geom; + string t = frag.ptx_elt_type; + + bit ret = !cond( + !and(!eq(g, "m8n8"), !eq(t, "b16")): true, + !and(!eq(g, "m16n8"), !eq(t, "b8"), !eq(trans, 1)): true, + true: false + ); +} + class SHFL_INFO<bit sync, string mode, string type, bit return_pred> { string Suffix = !if(sync, "sync_", "") # mode # "_" @@ -1969,6 +2017,23 @@ foreach transposed = [0, 1] in { } } +// STMATRIX +class NVVM_STMATRIX<WMMA_REGS Frag, int Transposed> + : Intrinsic<[], + !listconcat([llvm_anyptr_ty], Frag.regs), + [IntrWriteMem, IntrArgMemOnly, IntrNoCallback, + WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>], + STMATRIX_NAME<Frag, Transposed>.intr>; + +foreach transposed = [0, 1] in { + foreach frag = NVVM_MMA_OPS.all_stmatrix_ops in { + if NVVM_STMATRIX_SUPPORTED<frag, transposed>.ret then { + def STMATRIX_NAME<frag, transposed>.record + : NVVM_STMATRIX<frag, transposed>; + } + } +} + // MAPA let IntrProperties = [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>] in { def int_nvvm_mapa diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index f592ff2..c1e4b97 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -43,6 +43,10 @@ def int_wasm_ref_is_null_exn : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_exnref_ty], [IntrNoMem], "llvm.wasm.ref.is_null.exn">; +def int_wasm_ref_test_func + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_vararg_ty], + [IntrNoMem]>; + //===----------------------------------------------------------------------===// // Table intrinsics //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h index 737610b..0fd5de3 100644 --- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h +++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h @@ -112,7 +112,6 @@ inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { return false; } llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic"); - return false; } inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) { @@ -179,7 +178,6 @@ inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) { } llvm_unreachable( "Checking invalid f2i/d2i intrinsic for signed int conversion"); - return false; } inline APFloat::roundingMode @@ -250,7 +248,6 @@ GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) { return APFloat::rmTowardZero; } llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic"); - return APFloat::roundingMode::Invalid; } inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) { @@ -280,7 +277,6 @@ inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) { return false; } llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic"); - return false; } inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) { @@ -310,7 +306,6 @@ inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) { return false; } llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic"); - return false; } inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) { @@ -340,7 +335,83 @@ inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) { return false; } llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic"); - return false; +} + +inline bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_ceil_ftz_f: + case Intrinsic::nvvm_fabs_ftz: + case Intrinsic::nvvm_floor_ftz_f: + case Intrinsic::nvvm_round_ftz_f: + case Intrinsic::nvvm_saturate_ftz_f: + case Intrinsic::nvvm_sqrt_rn_ftz_f: + return true; + case Intrinsic::nvvm_ceil_f: + case Intrinsic::nvvm_ceil_d: + case Intrinsic::nvvm_fabs: + case Intrinsic::nvvm_floor_f: + case Intrinsic::nvvm_floor_d: + case Intrinsic::nvvm_round_f: + case Intrinsic::nvvm_round_d: + case Intrinsic::nvvm_saturate_d: + case Intrinsic::nvvm_saturate_f: + case Intrinsic::nvvm_sqrt_f: + case Intrinsic::nvvm_sqrt_rn_d: + case Intrinsic::nvvm_sqrt_rn_f: + return false; + } + llvm_unreachable("Checking FTZ flag for invalid unary intrinsic"); +} + +inline bool RCPShouldFTZ(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_rcp_rm_ftz_f: + case Intrinsic::nvvm_rcp_rn_ftz_f: + case Intrinsic::nvvm_rcp_rp_ftz_f: + case Intrinsic::nvvm_rcp_rz_ftz_f: + return true; + case Intrinsic::nvvm_rcp_rm_d: + case Intrinsic::nvvm_rcp_rm_f: + case Intrinsic::nvvm_rcp_rn_d: + case Intrinsic::nvvm_rcp_rn_f: + case Intrinsic::nvvm_rcp_rp_d: + case Intrinsic::nvvm_rcp_rp_f: + case Intrinsic::nvvm_rcp_rz_d: + case Intrinsic::nvvm_rcp_rz_f: + return false; + } + llvm_unreachable("Checking FTZ flag for invalid rcp intrinsic"); +} + +inline APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_rcp_rm_f: + case Intrinsic::nvvm_rcp_rm_d: + case Intrinsic::nvvm_rcp_rm_ftz_f: + return APFloat::rmTowardNegative; + + case Intrinsic::nvvm_rcp_rn_f: + case Intrinsic::nvvm_rcp_rn_d: + case Intrinsic::nvvm_rcp_rn_ftz_f: + return APFloat::rmNearestTiesToEven; + + case Intrinsic::nvvm_rcp_rp_f: + case Intrinsic::nvvm_rcp_rp_d: + case Intrinsic::nvvm_rcp_rp_ftz_f: + return APFloat::rmTowardPositive; + + case Intrinsic::nvvm_rcp_rz_f: + case Intrinsic::nvvm_rcp_rz_d: + case Intrinsic::nvvm_rcp_rz_ftz_f: + return APFloat::rmTowardZero; + } + llvm_unreachable("Checking rounding mode for invalid rcp intrinsic"); +} + +inline DenormalMode GetNVVMDenromMode(bool ShouldFTZ) { + if (ShouldFTZ) + return DenormalMode::getPreserveSign(); + return DenormalMode::getIEEE(); } } // namespace nvvm diff --git a/llvm/include/llvm/IR/PassInstrumentation.h b/llvm/include/llvm/IR/PassInstrumentation.h index 0315715..33eda5a 100644 --- a/llvm/include/llvm/IR/PassInstrumentation.h +++ b/llvm/include/llvm/IR/PassInstrumentation.h @@ -164,7 +164,7 @@ public: /// Add a class name to pass name mapping for use by pass instrumentation. LLVM_ABI void addClassToPassName(StringRef ClassName, StringRef PassName); - /// Get the pass name for a given pass class name. + /// Get the pass name for a given pass class name. Empty if no match found. LLVM_ABI StringRef getPassNameForClassName(StringRef ClassName); private: diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 50e50a9..27c5d5c 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -822,12 +822,52 @@ template <typename Class> struct bind_ty { } }; +/// Check whether the value has the given Class and matches the nested +/// pattern. Capture it into the provided variable if successful. +template <typename Class, typename MatchTy> struct bind_and_match_ty { + Class *&VR; + MatchTy Match; + + bind_and_match_ty(Class *&V, const MatchTy &Match) : VR(V), Match(Match) {} + + template <typename ITy> bool match(ITy *V) const { + auto *CV = dyn_cast<Class>(V); + if (CV && Match.match(V)) { + VR = CV; + return true; + } + return false; + } +}; + /// Match a value, capturing it if we match. inline bind_ty<Value> m_Value(Value *&V) { return V; } inline bind_ty<const Value> m_Value(const Value *&V) { return V; } +/// Match against the nested pattern, and capture the value if we match. +template <typename MatchTy> +inline bind_and_match_ty<Value, MatchTy> m_Value(Value *&V, + const MatchTy &Match) { + return {V, Match}; +} + +/// Match against the nested pattern, and capture the value if we match. +template <typename MatchTy> +inline bind_and_match_ty<const Value, MatchTy> m_Value(const Value *&V, + const MatchTy &Match) { + return {V, Match}; +} + /// Match an instruction, capturing it if we match. inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; } + +/// Match against the nested pattern, and capture the instruction if we match. +template <typename MatchTy> +inline bind_and_match_ty<Instruction, MatchTy> +m_Instruction(Instruction *&I, const MatchTy &Match) { + return {I, Match}; +} + /// Match a unary operator, capturing it if we match. inline bind_ty<UnaryOperator> m_UnOp(UnaryOperator *&I) { return I; } /// Match a binary operator, capturing it if we match. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 2e231cf..31801da 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -119,7 +119,6 @@ LLVM_ABI void initializeExpandVariadicsPass(PassRegistry &); LLVM_ABI void initializeExternalAAWrapperPassPass(PassRegistry &); LLVM_ABI void initializeFEntryInserterLegacyPass(PassRegistry &); LLVM_ABI void initializeFinalizeISelPass(PassRegistry &); -LLVM_ABI void initializeFinalizeMachineBundlesPass(PassRegistry &); LLVM_ABI void initializeFixIrreduciblePass(PassRegistry &); LLVM_ABI void initializeFixupStatepointCallerSavedLegacyPass(PassRegistry &); LLVM_ABI void initializeFlattenCFGLegacyPassPass(PassRegistry &); diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h index 0322cbe..bfc1175 100644 --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -18,9 +18,7 @@ namespace llvm { -class MCAlignFragment; class MCFragment; -class MCLEBFragment; class MCSymbol; class MCAssembler; class MCContext; @@ -60,6 +58,9 @@ protected: // Can only create subclasses. MCAssembler *Asm = nullptr; + bool AllowAutoPadding = false; + bool AllowEnhancedRelaxation = false; + public: MCAsmBackend(const MCAsmBackend &) = delete; MCAsmBackend &operator=(const MCAsmBackend &) = delete; @@ -73,11 +74,11 @@ public: /// Return true if this target might automatically pad instructions and thus /// need to emit padding enable/disable directives around sensative code. - virtual bool allowAutoPadding() const { return false; } + bool allowAutoPadding() const { return AllowAutoPadding; } /// Return true if this target allows an unrelaxable instruction to be /// emitted into RelaxableFragment and then we can increase its size in a /// tricky way for optimization. - virtual bool allowEnhancedRelaxation() const { return false; } + bool allowEnhancedRelaxation() const { return AllowEnhancedRelaxation; } /// lifetime management virtual void reset() {} @@ -105,21 +106,6 @@ public: /// Get information on a fixup kind. virtual MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const; - /// Hook to check if extra nop bytes must be inserted for alignment directive. - /// For some targets this may be necessary in order to support linker - /// relaxation. The number of bytes to insert are returned in Size. - virtual bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF, - unsigned &Size) { - return false; - } - - /// Hook which indicates if the target requires a fixup to be generated when - /// handling an align directive in an executable section - virtual bool shouldInsertFixupForCodeAlign(MCAssembler &Asm, - MCAlignFragment &AF) { - return false; - } - // Evaluate a fixup, returning std::nullopt to use default handling for // `Value` and `IsResolved`. Otherwise, returns `IsResolved` with the // expectation that the hook updates `Value`. @@ -177,6 +163,10 @@ public: } // Defined by linker relaxation targets. + + // Return false to use default handling. Otherwise, set `Size` to the number + // of padding bytes. + virtual bool relaxAlign(MCFragment &F, unsigned &Size) { return false; } virtual bool relaxDwarfLineAddr(MCFragment &, bool &WasRelaxed) const { return false; } diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index 319e131..2ceeba2 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -54,7 +54,6 @@ class MCObjectStreamer : public MCStreamer { void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &); void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; - void emitInstructionImpl(const MCInst &Inst, const MCSubtargetInfo &STI); protected: MCObjectStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB, @@ -73,13 +72,6 @@ public: MCSymbol *emitCFILabel() override; void emitCFISections(bool EH, bool Debug) override; - /// Get a data fragment to write into, creating a new one if the current - /// fragment is not FT_Data. - MCFragment *getOrCreateDataFragment(); - -protected: - bool changeSectionImpl(MCSection *Section, uint32_t Subsection); - public: void visitUsedSymbol(const MCSymbol &Sym) override; @@ -88,6 +80,15 @@ public: /// \name MCStreamer Interface /// @{ + // Add a fragment with a variable-size tail and start a new empty fragment. + void insert(MCFragment *F); + + // Add a new fragment to the current section without a variable-size tail. + void newFragment(); + + void appendContents(size_t Num, char Elt); + void addFixup(const MCExpr *Value, MCFixupKind Kind); + void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; virtual void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment &F, uint64_t Offset); diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h index 66ea8f8..87a8349 100644 --- a/llvm/include/llvm/MC/MCSection.h +++ b/llvm/include/llvm/MC/MCSection.h @@ -91,8 +91,7 @@ private: bool IsRegistered : 1; bool IsText : 1; - - bool IsVirtual : 1; + bool IsBss : 1; /// Whether the section contains linker-relaxable fragments. If true, the /// offset between two locations may not be fully resolved. @@ -113,7 +112,7 @@ protected: StringRef Name; SectionVariant Variant; - MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsVirtual, + MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsBss, MCSymbol *Begin); // Protected non-virtual dtor prevents destroy through a base class pointer. ~MCSection() {} @@ -178,9 +177,7 @@ public: /// Check whether this section is "virtual", that is has no actual object /// file contents. - bool isVirtualSection() const { return IsVirtual; } - - virtual StringRef getVirtualSectionKind() const; + bool isBssSection() const { return IsBss; } }; // Represents a contiguous piece of code or data within a section. Its size is @@ -234,11 +231,16 @@ protected: /// FT_Relaxable, x86-specific bool AllowAutoPadding : 1; + // Track content and fixups for the fixed-size part as fragments are + // appended to the section. The content remains immutable, except when + // modified by applyFixup. uint32_t ContentStart = 0; uint32_t ContentEnd = 0; uint32_t FixupStart = 0; uint32_t FixupEnd = 0; + // Track content and fixups for the optional variable-size tail part, + // typically modified during relaxation. uint32_t VarContentStart = 0; uint32_t VarContentEnd = 0; uint32_t VarFixupStart = 0; @@ -255,6 +257,19 @@ protected: uint32_t OperandSize; } relax; struct { + // The alignment to ensure, in bytes. + Align Alignment; + // The size of the integer (in bytes) of \p Value. + uint8_t FillLen; + // If true, fill with target-specific nop instructions. + bool EmitNops; + // The maximum number of bytes to emit; if the alignment + // cannot be satisfied in this width then this fragment is ignored. + unsigned MaxBytesToEmit; + // Value to use for filling padding bytes. + int64_t Fill; + } align; + struct { // True if this is a sleb128, false if uleb128. bool IsSigned; // The value this fragment should contain. @@ -283,6 +298,7 @@ public: return false; case MCFragment::FT_Relaxable: case MCFragment::FT_Data: + case MCFragment::FT_Align: case MCFragment::FT_Dwarf: case MCFragment::FT_DwarfFrame: case MCFragment::FT_LEB: @@ -327,9 +343,9 @@ public: bool getAllowAutoPadding() const { return AllowAutoPadding; } void setAllowAutoPadding(bool V) { AllowAutoPadding = V; } - // Content-related functions manage parent's storage using ContentStart and + //== Content-related functions manage parent's storage using ContentStart and // ContentSize. - void clearContents() { ContentEnd = ContentStart; } + // Get a SmallVector reference. The caller should call doneAppending to update // `ContentEnd`. SmallVectorImpl<char> &getContentsForAppending() { @@ -353,7 +369,6 @@ public: getContentsForAppending().append(Num, Elt); doneAppending(); } - LLVM_ABI void setContents(ArrayRef<char> Contents); MutableArrayRef<char> getContents() { return MutableArrayRef(getParent()->ContentStorage) .slice(ContentStart, ContentEnd - ContentStart); @@ -385,7 +400,6 @@ public: void clearFixups() { FixupEnd = FixupStart; } LLVM_ABI void addFixup(MCFixup Fixup); LLVM_ABI void appendFixups(ArrayRef<MCFixup> Fixups); - LLVM_ABI void setFixups(ArrayRef<MCFixup> Fixups); MutableArrayRef<MCFixup> getFixups() { return MutableArrayRef(getParent()->FixupStorage) .slice(FixupStart, FixupEnd - FixupStart); @@ -440,6 +454,38 @@ public: llvm::copy(Inst, S.begin() + u.relax.OperandStart); } + //== FT_Align functions + void makeAlign(Align Alignment, int64_t Fill, uint8_t FillLen, + unsigned MaxBytesToEmit) { + Kind = FT_Align; + u.align.EmitNops = false; + u.align.Alignment = Alignment; + u.align.Fill = Fill; + u.align.FillLen = FillLen; + u.align.MaxBytesToEmit = MaxBytesToEmit; + } + + Align getAlignment() const { + assert(Kind == FT_Align); + return u.align.Alignment; + } + int64_t getAlignFill() const { + assert(Kind == FT_Align); + return u.align.Fill; + } + uint8_t getAlignFillLen() const { + assert(Kind == FT_Align); + return u.align.FillLen; + } + unsigned getAlignMaxBytesToEmit() const { + assert(Kind == FT_Align); + return u.align.MaxBytesToEmit; + } + bool hasAlignEmitNops() const { + assert(Kind == FT_Align); + return u.align.EmitNops; + } + //== FT_LEB functions void makeLEB(bool IsSigned, const MCExpr *Value) { assert(Kind == FT_Data); @@ -487,52 +533,6 @@ protected: : MCFragment(FType, HasInstructions) {} }; -class MCAlignFragment : public MCFragment { - /// Flag to indicate that (optimal) NOPs should be emitted instead - /// of using the provided value. The exact interpretation of this flag is - /// target dependent. - bool EmitNops : 1; - - /// The alignment to ensure, in bytes. - Align Alignment; - - /// The size of the integer (in bytes) of \p Value. - uint8_t FillLen; - - /// The maximum number of bytes to emit; if the alignment - /// cannot be satisfied in this width then this fragment is ignored. - unsigned MaxBytesToEmit; - - /// Value to use for filling padding bytes. - int64_t Fill; - - /// When emitting Nops some subtargets have specific nop encodings. - const MCSubtargetInfo *STI = nullptr; - -public: - MCAlignFragment(Align Alignment, int64_t Fill, uint8_t FillLen, - unsigned MaxBytesToEmit) - : MCFragment(FT_Align, false), EmitNops(false), Alignment(Alignment), - FillLen(FillLen), MaxBytesToEmit(MaxBytesToEmit), Fill(Fill) {} - - Align getAlignment() const { return Alignment; } - int64_t getFill() const { return Fill; } - uint8_t getFillLen() const { return FillLen; } - unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; } - - bool hasEmitNops() const { return EmitNops; } - void setEmitNops(bool Value, const MCSubtargetInfo *STI) { - EmitNops = Value; - this->STI = STI; - } - - const MCSubtargetInfo *getSubtargetInfo() const { return STI; } - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_Align; - } -}; - class MCFillFragment : public MCFragment { uint8_t ValueSize; /// Value to use for filling bytes. diff --git a/llvm/include/llvm/MC/MCSectionCOFF.h b/llvm/include/llvm/MC/MCSectionCOFF.h index 4472a12..f979413a 100644 --- a/llvm/include/llvm/MC/MCSectionCOFF.h +++ b/llvm/include/llvm/MC/MCSectionCOFF.h @@ -82,7 +82,6 @@ public: raw_ostream &OS, uint32_t Subsection) const override; bool useCodeAlign() const override; - StringRef getVirtualSectionKind() const override; unsigned getOrAssignWinCFISectionID(unsigned *NextID) const { if (WinCFISectionID == ~0U) diff --git a/llvm/include/llvm/MC/MCSectionELF.h b/llvm/include/llvm/MC/MCSectionELF.h index f09d305..64a4daf 100644 --- a/llvm/include/llvm/MC/MCSectionELF.h +++ b/llvm/include/llvm/MC/MCSectionELF.h @@ -68,10 +68,6 @@ private: Group.getPointer()->setIsSignature(); } - // TODO Delete after we stop supporting generation of GNU-style .zdebug_* - // sections. - void setSectionName(StringRef Name) { this->Name = Name; } - public: /// Decides whether a '.section' directive should be printed before the /// section name @@ -88,7 +84,6 @@ public: raw_ostream &OS, uint32_t Subsection) const override; bool useCodeAlign() const override; - StringRef getVirtualSectionKind() const override; bool isUnique() const { return UniqueID != NonUniqueID; } unsigned getUniqueID() const { return UniqueID; } diff --git a/llvm/include/llvm/MC/MCSectionGOFF.h b/llvm/include/llvm/MC/MCSectionGOFF.h index 9e3f95e..b166397 100644 --- a/llvm/include/llvm/MC/MCSectionGOFF.h +++ b/llvm/include/llvm/MC/MCSectionGOFF.h @@ -111,7 +111,7 @@ public: // Returns the text style for a section. Only defined for ED and PR sections. GOFF::ESDTextStyle getTextStyle() const { - assert((isED() || isPR() || isVirtualSection()) && "Expect ED or PR section"); + assert((isED() || isPR() || isBssSection()) && "Expect ED or PR section"); if (isED()) return EDAttributes.TextStyle; if (isPR()) diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 4b91dbc..4bfc8f9 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -259,6 +259,8 @@ class LLVM_ABI MCStreamer { bool AllowAutoPadding = false; protected: + bool IsObj = false; + // Symbol of the current epilog for which we are processing SEH directives. WinEH::FrameInfo::Epilog *CurrentWinEpilog = nullptr; @@ -270,6 +272,8 @@ protected: /// section changes. virtual void changeSection(MCSection *, uint32_t); + void addFragment(MCFragment *F); + virtual void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame); virtual void emitCFIEndProcImpl(MCDwarfFrameInfo &CurFrame); @@ -308,6 +312,7 @@ public: virtual void reset(); MCContext &getContext() const { return Context; } + bool isObj() const { return IsObj; } // MCObjectStreamer has an MCAssembler and allows more expression folding at // parse time. @@ -425,10 +430,15 @@ public: } MCFragment *getCurrentFragment() const { + // Ensure consistency with the section stack. assert(!getCurrentSection().first || CurFrag->getParent() == getCurrentSection().first); + // Ensure we eagerly allocate an empty fragment after adding fragment with a + // variable-size tail. + assert(!CurFrag || CurFrag->getKind() == MCFragment::FT_Data); return CurFrag; } + size_t getCurFragOffset() const { return getCurrentFragment()->Offset; } /// Save the current and previous section on the section stack. void pushSection() { SectionStack.push_back( @@ -456,9 +466,6 @@ public: MCSymbol *endSection(MCSection *Section); - void insert(MCFragment *F); - void newFragment(); - /// Returns the mnemonic for \p MI, if the streamer has access to a /// instruction printer and returns an empty string otherwise. virtual StringRef getMnemonic(const MCInst &MI) const { return ""; } diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index a3aa0d9..ced1afd 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -1479,6 +1479,7 @@ template <class ELFT> Triple::OSType ELFObjectFile<ELFT>::getOS() const { case ELF::ELFOSABI_OPENBSD: return Triple::OpenBSD; case ELF::ELFOSABI_CUDA: + case ELF::ELFOSABI_CUDA_V2: return Triple::CUDA; case ELF::ELFOSABI_AMDGPU_HSA: return Triple::AMDHSA; diff --git a/llvm/include/llvm/Object/SFrameParser.h b/llvm/include/llvm/Object/SFrameParser.h new file mode 100644 index 0000000..cf4fe20 --- /dev/null +++ b/llvm/include/llvm/Object/SFrameParser.h @@ -0,0 +1,48 @@ +//===- SFrameParser.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_SFRAME_H +#define LLVM_OBJECT_SFRAME_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/SFrame.h" +#include "llvm/Support/Error.h" +#include <cstdint> + +namespace llvm { +namespace object { + +template <endianness E> class SFrameParser { +public: + static Expected<SFrameParser> create(ArrayRef<uint8_t> Contents); + + const sframe::Preamble<E> &getPreamble() const { return Header.Preamble; } + const sframe::Header<E> &getHeader() const { return Header; } + + bool usesFixedRAOffset() const { + return getHeader().ABIArch == sframe::ABI::AMD64EndianLittle; + } + bool usesFixedFPOffset() const { + return false; // Not used in any currently defined ABI. + } + +private: + ArrayRef<uint8_t> Data; + const sframe::Header<E> &Header; + + SFrameParser(ArrayRef<uint8_t> Data, const sframe::Header<E> &Header) + : Data(Data), Header(Header) {} +}; + +extern template class SFrameParser<endianness::big>; +extern template class SFrameParser<endianness::little>; + +} // end namespace object +} // end namespace llvm + +#endif // LLVM_OBJECT_SFRAME_H diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 732fdc7..bee2106 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass()) MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass()) MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass()) MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass()) +MACHINE_FUNCTION_PASS("finalizebundle-test", FinalizeBundleTestPass()) MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", FixupStatepointCallerSavedPass()) MACHINE_FUNCTION_PASS("init-undef", InitUndefPass()) MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass()) diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 7577792..b65a63b 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -78,6 +78,8 @@ def : GINodeEquiv<G_XOR, xor>; def : GINodeEquiv<G_SHL, shl>; def : GINodeEquiv<G_LSHR, srl>; def : GINodeEquiv<G_ASHR, sra>; +def : GINodeEquiv<G_ABDS, abds>; +def : GINodeEquiv<G_ABDU, abdu>; def : GINodeEquiv<G_SADDSAT, saddsat>; def : GINodeEquiv<G_UADDSAT, uaddsat>; def : GINodeEquiv<G_SSUBSAT, ssubsat>; diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 59e8117..8e83b046 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -276,14 +276,14 @@ LLVM_ABI bool isX18ReservedByDefault(const Triple &TT); // For a given set of feature names, which can be either target-features, or // fmv-features metadata, expand their dependencies and then return a bitmask // corresponding to the entries of AArch64::FeatPriorities. -LLVM_ABI uint64_t getFMVPriority(ArrayRef<StringRef> Features); +LLVM_ABI APInt getFMVPriority(ArrayRef<StringRef> Features); // For a given set of FMV feature names, expand their dependencies and then // return a bitmask corresponding to the entries of AArch64::CPUFeatures. // The values in CPUFeatures are not bitmasks themselves, they are sequential // (0, 1, 2, 3, ...). The resulting bitmask is used at runtime to test whether // a certain FMV feature is available on the host. -LLVM_ABI uint64_t getCpuSupportsMask(ArrayRef<StringRef> Features); +LLVM_ABI APInt getCpuSupportsMask(ArrayRef<StringRef> Features); LLVM_ABI void PrintSupportedExtensions(); diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index bb79d25..3f5f427 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -325,7 +325,6 @@ LLVM_ABI void salvageDebugInfo(Instruction &I); /// Mark undef if salvaging cannot be completed. LLVM_ABI void salvageDebugInfoForDbgValues(Instruction &I, - ArrayRef<DbgVariableIntrinsic *> Insns, ArrayRef<DbgVariableRecord *> DPInsns); /// Given an instruction \p I and DIExpression \p DIExpr operating on diff --git a/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h b/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h index f288bdf..e0cdcf8 100644 --- a/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h +++ b/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h @@ -57,7 +57,6 @@ struct AllocaInfo { struct StackInfo { MapVector<AllocaInst *, AllocaInfo> AllocasToInstrument; - SmallVector<Instruction *, 4> UnrecognizedLifetimes; SmallVector<Instruction *, 8> RetVec; bool CallsReturnTwice = false; }; |