aboutsummaryrefslogtreecommitdiff
path: root/llvm/include/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/include/llvm')
-rw-r--r--llvm/include/llvm/Analysis/LoopAccessAnalysis.h10
-rw-r--r--llvm/include/llvm/Analysis/MemoryProfileInfo.h6
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h2
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h4
-rw-r--r--llvm/include/llvm/AsmParser/LLToken.h1
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h21
-rw-r--r--llvm/include/llvm/BinaryFormat/SFrame.h28
-rw-r--r--llvm/include/llvm/BinaryFormat/SFrameConstants.def39
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h14
-rw-r--r--llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h12
-rw-r--r--llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h12
-rw-r--r--llvm/include/llvm/CodeGen/MachineInstrBundle.h7
-rw-r--r--llvm/include/llvm/CodeGen/MachineScheduler.h18
-rw-r--r--llvm/include/llvm/CodeGen/Passes.h4
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAG.h15
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGNodes.h16
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h20
-rw-r--r--llvm/include/llvm/CodeGen/TargetSubtargetInfo.h5
-rw-r--r--llvm/include/llvm/ExecutionEngine/MCJIT.h14
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/ClauseT.h9
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h5
-rw-r--r--llvm/include/llvm/IR/CallingConv.h8
-rw-r--r--llvm/include/llvm/IR/DebugInfo.h29
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAMDGPU.td15
-rw-r--r--llvm/include/llvm/IR/IntrinsicsNVVM.td143
-rw-r--r--llvm/include/llvm/IR/IntrinsicsWebAssembly.td4
-rw-r--r--llvm/include/llvm/IR/NVVMIntrinsicUtils.h83
-rw-r--r--llvm/include/llvm/IR/PassInstrumentation.h2
-rw-r--r--llvm/include/llvm/IR/PatternMatch.h40
-rw-r--r--llvm/include/llvm/InitializePasses.h1
-rw-r--r--llvm/include/llvm/LinkAllIR.h8
-rw-r--r--llvm/include/llvm/LinkAllPasses.h15
-rw-r--r--llvm/include/llvm/MC/MCAsmBackend.h28
-rw-r--r--llvm/include/llvm/MC/MCAssembler.h3
-rw-r--r--llvm/include/llvm/MC/MCObjectStreamer.h17
-rw-r--r--llvm/include/llvm/MC/MCSection.h140
-rw-r--r--llvm/include/llvm/MC/MCSectionCOFF.h1
-rw-r--r--llvm/include/llvm/MC/MCSectionELF.h5
-rw-r--r--llvm/include/llvm/MC/MCSectionGOFF.h2
-rw-r--r--llvm/include/llvm/MC/MCStreamer.h13
-rw-r--r--llvm/include/llvm/Object/ELFObjectFile.h1
-rw-r--r--llvm/include/llvm/Object/SFrameParser.h48
-rw-r--r--llvm/include/llvm/Passes/MachinePassRegistry.def1
-rw-r--r--llvm/include/llvm/Support/AlwaysTrue.h25
-rw-r--r--llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td2
-rw-r--r--llvm/include/llvm/TargetParser/AArch64TargetParser.h4
-rw-r--r--llvm/include/llvm/Transforms/Utils/Local.h1
-rw-r--r--llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h1
48 files changed, 645 insertions, 257 deletions
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 73bfe1a..af6e534 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -236,8 +236,8 @@ public:
/// In same cases when the dependency check fails we can still
/// vectorize the loop with a dynamic array access check.
- bool shouldRetryWithRuntimeCheck() const {
- return FoundNonConstantDistanceDependence &&
+ bool shouldRetryWithRuntimeChecks() const {
+ return ShouldRetryWithRuntimeChecks &&
Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks;
}
@@ -327,9 +327,9 @@ private:
uint64_t MaxStoreLoadForwardSafeDistanceInBits =
std::numeric_limits<uint64_t>::max();
- /// If we see a non-constant dependence distance we can still try to
- /// vectorize this loop with runtime checks.
- bool FoundNonConstantDistanceDependence = false;
+ /// Whether we should try to vectorize the loop with runtime checks, if the
+ /// dependencies are not safe.
+ bool ShouldRetryWithRuntimeChecks = false;
/// Result of the dependence checks, indicating whether the checked
/// dependences are safe for vectorization, require RT checks or are known to
diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index b042a71..571caf9 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -102,6 +102,12 @@ private:
// The maximum size of a cold allocation context, from the profile summary.
uint64_t MaxColdSize;
+ // Tracks whether we have built the Trie from existing MD_memprof metadata. We
+ // apply different heuristics for determining whether to discard non-cold
+ // contexts when rebuilding as we have lost information available during the
+ // original profile match.
+ bool BuiltFromExistingMetadata = false;
+
void deleteTrieNode(CallStackTrieNode *Node) {
if (!Node)
return;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 98b793a..7928835 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1930,7 +1930,7 @@ public:
/// Returns a bitmask constructed from the target-features or fmv-features
/// metadata of a function.
- LLVM_ABI uint64_t getFeatureMask(const Function &F) const;
+ LLVM_ABI APInt getFeatureMask(const Function &F) const;
/// Returns true if this is an instance of a function with multiple versions.
LLVM_ABI bool isMultiversionedFunction(const Function &F) const;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index ddc8a5e..2ea87b3 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1126,7 +1126,9 @@ public:
virtual bool hasArmWideBranch(bool) const { return false; }
- virtual uint64_t getFeatureMask(const Function &F) const { return 0; }
+ virtual APInt getFeatureMask(const Function &F) const {
+ return APInt::getZero(32);
+ }
virtual bool isMultiversionedFunction(const Function &F) const {
return false;
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index c7e4bdf..a2311d2 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -181,6 +181,7 @@ enum Kind {
kw_amdgpu_cs_chain_preserve,
kw_amdgpu_kernel,
kw_amdgpu_gfx,
+ kw_amdgpu_gfx_whole_wave,
kw_tailcc,
kw_m68k_rtdcc,
kw_graalcc,
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index e4f82ad..ad35d7f 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -362,6 +362,7 @@ enum {
ELFOSABI_FENIXOS = 16, // FenixOS
ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI
ELFOSABI_CUDA = 51, // NVIDIA CUDA architecture.
+ ELFOSABI_CUDA_V2 = 41, // NVIDIA CUDA architecture.
ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime
@@ -385,6 +386,12 @@ enum {
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
};
+// CUDA OS ABI Version identification.
+enum {
+ ELFABIVERSION_CUDA_V1 = 7,
+ ELFABIVERSION_CUDA_V2 = 8,
+};
+
#define ELF_RELOC(name, value) name = value,
// X86_64 relocations.
@@ -921,7 +928,7 @@ enum {
// NVPTX specific e_flags.
enum : unsigned {
- // Processor selection mask for EF_CUDA_SM* values.
+ // Processor selection mask for EF_CUDA_SM* values prior to blackwell.
EF_CUDA_SM = 0xff,
// SM based processor values.
@@ -954,12 +961,22 @@ enum : unsigned {
// The target is using 64-bit addressing.
EF_CUDA_64BIT_ADDRESS = 0x400,
// Set when using the sm_90a processor.
- EF_CUDA_ACCELERATORS = 0x800,
+ EF_CUDA_ACCELERATORS_V1 = 0x800,
// Undocumented software feature.
EF_CUDA_SW_FLAG_V2 = 0x1000,
// Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
EF_CUDA_VIRTUAL_SM = 0xff0000,
+
+ // Processor selection mask for EF_CUDA_SM* values following blackwell.
+ EF_CUDA_SM_MASK = 0xff00,
+
+ // SM based processor values.
+ EF_CUDA_SM100 = 0x6400,
+ EF_CUDA_SM120 = 0x7800,
+
+ // Set when using an accelerator variant like sm_100a.
+ EF_CUDA_ACCELERATORS = 0x8,
};
// ELF Relocation types for BPF
diff --git a/llvm/include/llvm/BinaryFormat/SFrame.h b/llvm/include/llvm/BinaryFormat/SFrame.h
index 16d3b16..98dbe38 100644
--- a/llvm/include/llvm/BinaryFormat/SFrame.h
+++ b/llvm/include/llvm/BinaryFormat/SFrame.h
@@ -15,33 +15,36 @@
#ifndef LLVM_BINARYFORMAT_SFRAME_H
#define LLVM_BINARYFORMAT_SFRAME_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Endian.h"
-namespace llvm::sframe {
+namespace llvm {
+
+template <typename T> struct EnumEntry;
+
+namespace sframe {
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
constexpr uint16_t Magic = 0xdee2;
enum class Version : uint8_t {
- V1 = 1,
- V2 = 2,
+#define HANDLE_SFRAME_VERSION(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/SFrameConstants.def"
};
enum class Flags : uint8_t {
- FDESorted = 0x01,
- FramePointer = 0x02,
- FDEFuncStartPCRel = 0x04,
+#define HANDLE_SFRAME_FLAG(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/SFrameConstants.def"
V2AllFlags = FDESorted | FramePointer | FDEFuncStartPCRel,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xff),
};
enum class ABI : uint8_t {
- AArch64EndianBig = 1,
- AArch64EndianLittle = 2,
- AMD64EndianLittle = 3,
+#define HANDLE_SFRAME_ABI(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/SFrameConstants.def"
};
/// SFrame FRE Types. Bits 0-3 of FuncDescEntry.Info.
@@ -160,6 +163,11 @@ template <endianness E> using FrameRowEntryAddr1 = FrameRowEntry<uint8_t, E>;
template <endianness E> using FrameRowEntryAddr2 = FrameRowEntry<uint16_t, E>;
template <endianness E> using FrameRowEntryAddr4 = FrameRowEntry<uint32_t, E>;
-} // namespace llvm::sframe
+ArrayRef<EnumEntry<Version>> getVersions();
+ArrayRef<EnumEntry<Flags>> getFlags();
+ArrayRef<EnumEntry<ABI>> getABIs();
+
+} // namespace sframe
+} // namespace llvm
#endif // LLVM_BINARYFORMAT_SFRAME_H
diff --git a/llvm/include/llvm/BinaryFormat/SFrameConstants.def b/llvm/include/llvm/BinaryFormat/SFrameConstants.def
new file mode 100644
index 0000000..643b15f
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/SFrameConstants.def
@@ -0,0 +1,39 @@
+//===- SFrameConstants.def --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if !(defined(HANDLE_SFRAME_VERSION) || defined(HANDLE_SFRAME_FLAG) || \
+ defined(HANDLE_SFRAME_ABI))
+#error "Missing HANDLE_SFRAME definition"
+#endif
+
+#ifndef HANDLE_SFRAME_VERSION
+#define HANDLE_SFRAME_VERSION(CODE, NAME)
+#endif
+
+#ifndef HANDLE_SFRAME_FLAG
+#define HANDLE_SFRAME_FLAG(CODE, NAME)
+#endif
+
+#ifndef HANDLE_SFRAME_ABI
+#define HANDLE_SFRAME_ABI(CODE, NAME)
+#endif
+
+HANDLE_SFRAME_VERSION(0x01, V1)
+HANDLE_SFRAME_VERSION(0x02, V2)
+
+HANDLE_SFRAME_FLAG(0x01, FDESorted)
+HANDLE_SFRAME_FLAG(0x02, FramePointer)
+HANDLE_SFRAME_FLAG(0x04, FDEFuncStartPCRel)
+
+HANDLE_SFRAME_ABI(0x01, AArch64EndianBig)
+HANDLE_SFRAME_ABI(0x02, AArch64EndianLittle)
+HANDLE_SFRAME_ABI(0x03, AMD64EndianLittle)
+
+#undef HANDLE_SFRAME_VERSION
+#undef HANDLE_SFRAME_FLAG
+#undef HANDLE_SFRAME_ABI
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
index da73238..490d1a3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
@@ -103,6 +103,20 @@ public:
/// \return The known alignment for the pointer-like value \p R.
Align computeKnownAlignment(Register R, unsigned Depth = 0);
+ /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts
+ /// that are all less than the element bit-width of the shift node, return the
+ /// valid constant range.
+ std::optional<ConstantRange>
+ getValidShiftAmountRange(Register R, const APInt &DemandedElts,
+ unsigned Depth);
+
+ /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts
+ /// that are all less than the element bit-width of the shift node, return the
+ /// minimum possible value.
+ std::optional<uint64_t> getValidMinimumShiftAmount(Register R,
+ const APInt &DemandedElts,
+ unsigned Depth = 0);
+
/// Determine which floating-point classes are valid for \p V, and return them
/// in KnownFPClass bit sets.
///
diff --git a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
index c22f9d4..c70413d 100644
--- a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
+++ b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
@@ -15,19 +15,17 @@
#define LLVM_CODEGEN_LINKALLASMWRITERCOMPONENTS_H
#include "llvm/IR/BuiltinGCs.h"
-#include <cstdlib>
+#include "llvm/Support/AlwaysTrue.h"
namespace {
struct ForceAsmWriterLinking {
ForceAsmWriterLinking() {
// We must reference the plug-ins in such a way that compilers will not
// delete it all as dead code, even with whole program optimization,
- // yet is effectively a NO-OP. As the compiler isn't smart enough
- // to know that getenv() never returns -1, this will do the job.
- // This is so that globals in the translation units where these functions
- // are defined are forced to be initialized, populating various
- // registries.
- if (std::getenv("bar") != (char*) -1)
+ // yet is effectively a NO-OP. This is so that globals in the translation
+ // units where these functions are defined are forced to be initialized,
+ // populating various registries.
+ if (llvm::getNonFoldableAlwaysTrue())
return;
llvm::linkOcamlGCPrinter();
diff --git a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 6f56682..f0a01d2 100644
--- a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -16,20 +16,18 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Support/AlwaysTrue.h"
#include "llvm/Target/TargetMachine.h"
-#include <cstdlib>
namespace {
struct ForceCodegenLinking {
ForceCodegenLinking() {
// We must reference the passes in such a way that compilers will not
// delete it all as dead code, even with whole program optimization,
- // yet is effectively a NO-OP. As the compiler isn't smart enough
- // to know that getenv() never returns -1, this will do the job.
- // This is so that globals in the translation units where these functions
- // are defined are forced to be initialized, populating various
- // registries.
- if (std::getenv("bar") != (char*) -1)
+ // yet is effectively a NO-OP. This is so that globals in the translation
+ // units where these functions are defined are forced to be initialized,
+ // populating various registries.
+ if (llvm::getNonFoldableAlwaysTrue())
return;
(void) llvm::createFastRegisterAllocator();
diff --git a/llvm/include/llvm/CodeGen/MachineInstrBundle.h b/llvm/include/llvm/CodeGen/MachineInstrBundle.h
index d324236..ebf7534 100644
--- a/llvm/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/llvm/include/llvm/CodeGen/MachineInstrBundle.h
@@ -15,6 +15,7 @@
#define LLVM_CODEGEN_MACHINEINSTRBUNDLE_H
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
@@ -294,6 +295,12 @@ LLVM_ABI PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI,
Register Reg,
const TargetRegisterInfo *TRI);
+class FinalizeBundleTestPass : public PassInfoMixin<FinalizeBundleTestPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+};
+
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index e7a7091..efda7eb 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -65,7 +65,7 @@
//
// void <SubTarget>Subtarget::
// overrideSchedPolicy(MachineSchedPolicy &Policy,
-// unsigned NumRegionInstrs) const {
+// const SchedRegion &Region) const {
// Policy.<Flag> = true;
// }
//
@@ -218,6 +218,22 @@ struct MachineSchedPolicy {
MachineSchedPolicy() = default;
};
+/// A region of an MBB for scheduling.
+struct SchedRegion {
+ /// RegionBegin is the first instruction in the scheduling region, and
+ /// RegionEnd is either MBB->end() or the scheduling boundary after the
+ /// last instruction in the scheduling region. These iterators cannot refer
+ /// to instructions outside of the identified scheduling region because
+ /// those may be reordered before scheduling this region.
+ MachineBasicBlock::iterator RegionBegin;
+ MachineBasicBlock::iterator RegionEnd;
+ unsigned NumRegionInstrs;
+
+ SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
+ unsigned N)
+ : RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
+};
+
/// MachineSchedStrategy - Interface to the scheduling algorithm used by
/// ScheduleDAGMI.
///
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 714285e..095a40e 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -438,10 +438,6 @@ LLVM_ABI extern char &UnpackMachineBundlesID;
LLVM_ABI FunctionPass *
createUnpackMachineBundles(std::function<bool(const MachineFunction &)> Ftor);
-/// FinalizeMachineBundles - This pass finalize machine instruction
-/// bundles (created earlier, e.g. during pre-RA scheduling).
-LLVM_ABI extern char &FinalizeMachineBundlesID;
-
/// StackMapLiveness - This pass analyses the register live-out set of
/// stackmap/patchpoint intrinsics and attaches the calculated information to
/// the intrinsic for later emission to the StackMap.
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 657951d..eac8e14 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1202,13 +1202,16 @@ public:
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops, const SDNodeFlags Flags);
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL,
- ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops);
+ ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops,
+ const SDNodeFlags Flags);
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
ArrayRef<SDValue> Ops, const SDNodeFlags Flags);
// Use flags from current flag inserter.
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops);
+ LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL,
+ ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops);
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
ArrayRef<SDValue> Ops);
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -1346,9 +1349,10 @@ public:
/// Helper function to make it easier to build SelectCC's if you just have an
/// ISD::CondCode instead of an SDValue.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True,
- SDValue False, ISD::CondCode Cond) {
+ SDValue False, ISD::CondCode Cond,
+ SDNodeFlags Flags = SDNodeFlags()) {
return getNode(ISD::SELECT_CC, DL, True.getValueType(), LHS, RHS, True,
- False, getCondCode(Cond));
+ False, getCondCode(Cond), Flags);
}
/// Try to simplify a select/vselect into 1 of its operands or a constant.
@@ -1425,10 +1429,9 @@ public:
/// Creates a LifetimeSDNode that starts (`IsStart==true`) or ends
/// (`IsStart==false`) the lifetime of the portion of `FrameIndex` between
- /// offsets `Offset` and `Offset + Size`.
+ /// offsets `0` and `Size`.
LLVM_ABI SDValue getLifetimeNode(bool IsStart, const SDLoc &dl, SDValue Chain,
- int FrameIndex, int64_t Size,
- int64_t Offset = -1);
+ int FrameIndex, int64_t Size);
/// Creates a PseudoProbeSDNode with function GUID `Guid` and
/// the index of the block `Index` it is probing, as well as the attributes
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 5d9937f..8e9c1f7 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -2004,25 +2004,17 @@ public:
class LifetimeSDNode : public SDNode {
friend class SelectionDAG;
int64_t Size;
- int64_t Offset; // -1 if offset is unknown.
LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
- SDVTList VTs, int64_t Size, int64_t Offset)
- : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
+ SDVTList VTs, int64_t Size)
+ : SDNode(Opcode, Order, dl, VTs), Size(Size) {}
+
public:
int64_t getFrameIndex() const {
return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
}
- bool hasOffset() const { return Offset >= 0; }
- int64_t getOffset() const {
- assert(hasOffset() && "offset is unknown");
- return Offset;
- }
- int64_t getSize() const {
- assert(hasOffset() && "offset is unknown");
- return Size;
- }
+ int64_t getSize() const { return Size; }
// Methods to support isa and dyn_cast
static bool classof(const SDNode *N) {
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 1a548a5..cbdc1b6 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3219,25 +3219,19 @@ public:
/// Lower an interleaved store to target specific intrinsics. Return
/// true on success.
///
- /// \p SI is the vector store instruction.
+ /// \p SI is the vector store instruction. Can be either a plain store
+ /// or a vp.store.
+ /// \p Mask is a per-segment (i.e. number of lanes equal to that of one
+ /// component being interwoven) mask. Can be nullptr, in which case the
+ /// result is unconditional.
/// \p SVI is the shufflevector to RE-interleave the stored vector.
/// \p Factor is the interleave factor.
- virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+ virtual bool lowerInterleavedStore(Instruction *Store, Value *Mask,
+ ShuffleVectorInst *SVI,
unsigned Factor) const {
return false;
}
- /// Lower an interleaved store to target specific intrinsics. Return
- /// true on success.
- ///
- /// \p Store is the vp.store instruction.
- /// \p Mask is a mask value
- /// \p InterleaveOps is a list of values being interleaved.
- virtual bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask,
- ArrayRef<Value *> InterleaveOps) const {
- return false;
- }
-
/// Lower a deinterleave intrinsic to a target specific load intrinsic.
/// Return true on success. Currently only supports
/// llvm.vector.deinterleave{2,3,5,7}
diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index 45e67d8..a8c7a8a 100644
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -54,6 +54,7 @@ class TargetRegisterClass;
class TargetRegisterInfo;
class TargetSchedModel;
class Triple;
+struct SchedRegion;
//===----------------------------------------------------------------------===//
///
@@ -231,7 +232,7 @@ public:
/// scheduling heuristics (no custom MachineSchedStrategy) to make
/// changes to the generic scheduling policy.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy,
- unsigned NumRegionInstrs) const {}
+ const SchedRegion &Region) const {}
/// Override generic post-ra scheduling policy within a region.
///
@@ -241,7 +242,7 @@ public:
/// Note that some options like tracking register pressure won't take effect
/// in post-ra scheduling.
virtual void overridePostRASchedPolicy(MachineSchedPolicy &Policy,
- unsigned NumRegionInstrs) const {}
+ const SchedRegion &Region) const {}
// Perform target-specific adjustments to the latency of a schedule
// dependency.
diff --git a/llvm/include/llvm/ExecutionEngine/MCJIT.h b/llvm/include/llvm/ExecutionEngine/MCJIT.h
index c836c06..1e035c0 100644
--- a/llvm/include/llvm/ExecutionEngine/MCJIT.h
+++ b/llvm/include/llvm/ExecutionEngine/MCJIT.h
@@ -15,8 +15,8 @@
#define LLVM_EXECUTIONENGINE_MCJIT_H
#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Support/AlwaysTrue.h"
#include "llvm/Support/Compiler.h"
-#include <cstdlib>
extern "C" LLVM_ABI void LLVMLinkInMCJIT();
@@ -24,13 +24,11 @@ namespace {
struct ForceMCJITLinking {
ForceMCJITLinking() {
// We must reference MCJIT in such a way that compilers will not
- // delete it all as dead code, even with whole program optimization,
- // yet is effectively a NO-OP. As the compiler isn't smart enough
- // to know that getenv() never returns -1, this will do the job.
- // This is so that globals in the translation units where these functions
- // are defined are forced to be initialized, populating various
- // registries.
- if (std::getenv("bar") != (char*) -1)
+ // delete it all as dead code, even with whole program optimization, yet
+ // is effectively a NO-OP. This is so that globals in the translation
+ // units where these functions are defined are forced to be initialized,
+ // populating various registries.
+ if (llvm::getNonFoldableAlwaysTrue())
return;
LLVMLinkInMCJIT();
diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index de888ff..7919f7a 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -779,16 +779,17 @@ struct LinkT {
template <typename T, typename I, typename E> //
struct MapT {
using LocatorList = ObjectListT<I, E>;
- ENUM(MapType, To, From, Tofrom, Alloc, Release, Delete);
- ENUM(MapTypeModifier, Always, Close, Present, OmpxHold);
+ ENUM(MapType, To, From, Tofrom, Storage);
+ ENUM(MapTypeModifier, Always, Close, Delete, Present, Self, OmpxHold);
+ ENUM(RefModifier, RefPtee, RefPtr, RefPtrPtee);
// See note at the definition of the MapperT type.
using Mappers = ListT<type::MapperT<I, E>>; // Not a spec name
using Iterator = type::IteratorT<T, I, E>;
using MapTypeModifiers = ListT<MapTypeModifier>; // Not a spec name
using TupleTrait = std::true_type;
- std::tuple<OPT(MapType), OPT(MapTypeModifiers), OPT(Mappers), OPT(Iterator),
- LocatorList>
+ std::tuple<OPT(MapType), OPT(MapTypeModifiers), OPT(RefModifier),
+ OPT(Mappers), OPT(Iterator), LocatorList>
t;
};
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
index 611bfe3..047baa3 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -708,6 +708,7 @@ bool ConstructDecompositionT<C, H>::applyClause(
tomp::clause::MapT<TypeTy, IdTy, ExprTy>{
{/*MapType=*/MapType::Tofrom,
/*MapTypeModifier=*/std::nullopt,
+ /*RefModifier=*/std::nullopt,
/*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
/*LocatorList=*/std::move(tofrom)}});
dirTarget->clauses.push_back(map);
@@ -969,8 +970,8 @@ bool ConstructDecompositionT<C, H>::applyClause(
llvm::omp::Clause::OMPC_map,
tomp::clause::MapT<TypeTy, IdTy, ExprTy>{
{/*MapType=*/MapType::Tofrom, /*MapTypeModifier=*/std::nullopt,
- /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
- /*LocatorList=*/std::move(tofrom)}});
+ /*RefModifier=*/std::nullopt, /*Mapper=*/std::nullopt,
+ /*Iterator=*/std::nullopt, /*LocatorList=*/std::move(tofrom)}});
dirTarget->clauses.push_back(map);
applied = true;
diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h
index d68491e..ef761eb 100644
--- a/llvm/include/llvm/IR/CallingConv.h
+++ b/llvm/include/llvm/IR/CallingConv.h
@@ -284,6 +284,9 @@ namespace CallingConv {
RISCV_VLSCall_32768 = 122,
RISCV_VLSCall_65536 = 123,
+ // Calling convention for AMDGPU whole wave functions.
+ AMDGPU_Gfx_WholeWave = 124,
+
/// The highest possible ID. Must be some 2^k - 1.
MaxID = 1023
};
@@ -294,8 +297,13 @@ namespace CallingConv {
/// directly or indirectly via a call-like instruction.
constexpr bool isCallableCC(CallingConv::ID CC) {
switch (CC) {
+ // Called with special intrinsics:
+ // llvm.amdgcn.cs.chain
case CallingConv::AMDGPU_CS_Chain:
case CallingConv::AMDGPU_CS_ChainPreserve:
+ // llvm.amdgcn.call.whole.wave
+ case CallingConv::AMDGPU_Gfx_WholeWave:
+ // Hardware entry points:
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_ES:
case CallingConv::AMDGPU_GS:
diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h
index f8241a3..c529a86 100644
--- a/llvm/include/llvm/IR/DebugInfo.h
+++ b/llvm/include/llvm/IR/DebugInfo.h
@@ -39,30 +39,26 @@ class DbgVariableRecord;
class Instruction;
class Module;
-/// Finds dbg.declare intrinsics declaring local variables as living in the
+/// Finds dbg.declare records declaring local variables as living in the
/// memory that 'V' points to.
-LLVM_ABI TinyPtrVector<DbgDeclareInst *> findDbgDeclares(Value *V);
-/// As above, for DVRDeclares.
LLVM_ABI TinyPtrVector<DbgVariableRecord *> findDVRDeclares(Value *V);
/// As above, for DVRValues.
LLVM_ABI TinyPtrVector<DbgVariableRecord *> findDVRValues(Value *V);
-/// Finds the llvm.dbg.value intrinsics describing a value.
-LLVM_ABI void findDbgValues(
- SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V,
- SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords = nullptr);
-
-/// Finds the debug info intrinsics describing a value.
-LLVM_ABI void findDbgUsers(
- SmallVectorImpl<DbgVariableIntrinsic *> &DbgInsts, Value *V,
- SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords = nullptr);
+/// Finds the debug info records describing a value.
+LLVM_ABI void
+findDbgUsers(Value *V,
+ SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords);
+/// Finds the dbg.values describing a value.
+LLVM_ABI void
+findDbgValues(Value *V,
+ SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords);
/// Find subprogram that is enclosing this scope.
LLVM_ABI DISubprogram *getDISubprogram(const MDNode *Scope);
/// Produce a DebugLoc to use for each dbg.declare that is promoted to a
/// dbg.value.
-LLVM_ABI DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII);
LLVM_ABI DebugLoc getDebugValueLoc(DbgVariableRecord *DVR);
/// Strip debug info in the module if it exists.
@@ -192,13 +188,6 @@ using AssignmentInstRange =
/// Iterators invalidated by adding or removing DIAssignID metadata to/from any
/// instruction (including by deleting or cloning instructions).
LLVM_ABI AssignmentInstRange getAssignmentInsts(DIAssignID *ID);
-/// Return a range of instructions (typically just one) that perform the
-/// assignment that \p DAI encodes.
-/// Iterators invalidated by adding or removing DIAssignID metadata to/from any
-/// instruction (including by deleting or cloning instructions).
-inline AssignmentInstRange getAssignmentInsts(const DbgAssignIntrinsic *DAI) {
- return getAssignmentInsts(DAI->getAssignID());
-}
inline AssignmentInstRange getAssignmentInsts(const DbgVariableRecord *DVR) {
assert(DVR->isDbgAssign() &&
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index ecda6c4..8bfa345 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -3717,6 +3717,20 @@ class AMDGPUWmmaIntrinsicModsAllDiff<LLVMType DstTy, LLVMType AB, LLVMType C> :
IntrWillReturn, IntrNoCallback, IntrNoFree]
>;
+class AMDGPUWmmaIntrinsicModsC_MatrixFMT :
+ Intrinsic<
+ [llvm_anyfloat_ty], // %D
+ [
+ llvm_i32_ty, // matrix_a_fmt
+ llvm_anyint_ty, // %A
+ llvm_i32_ty, // matrix_b_fmt
+ llvm_anyint_ty, // %B
+ llvm_i16_ty, // %C_mod: 0 - none, 1 - neg, 2 - abs, 3 - neg(abs)
+ LLVMMatchType<0>, // %C
+ ],
+ [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
+>;
+
defset list<Intrinsic> AMDGPUWMMAIntrinsicsGFX1250 = {
def int_amdgcn_wmma_f32_16x16x4_f32 : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>;
def int_amdgcn_wmma_f32_16x16x32_bf16 : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>;
@@ -3741,6 +3755,7 @@ def int_amdgcn_wmma_f32_16x16x128_fp8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint
def int_amdgcn_wmma_f32_16x16x128_bf8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
def int_amdgcn_wmma_f32_16x16x128_bf8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
def int_amdgcn_wmma_i32_16x16x64_iu8 : AMDGPUWmmaIntrinsicModsAB<llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_wmma_f32_16x16x128_f8f6f4 : AMDGPUWmmaIntrinsicModsC_MatrixFMT;
def int_amdgcn_wmma_f32_32x16x128_f4 : AMDGPUWmmaIntrinsicF4ModsC<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 0375f29..967d166 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -331,6 +331,11 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
!eq(gf,"m8n16:x2") : !listsplat(llvm_i32_ty, 2),
!eq(gf,"m8n16:x4") : !listsplat(llvm_i32_ty, 4),
+ // stmatrix b8 -> s32 @ m16n8
+ !eq(gf,"m16n8:x1") : !listsplat(llvm_i32_ty, 1),
+ !eq(gf,"m16n8:x2") : !listsplat(llvm_i32_ty, 2),
+ !eq(gf,"m16n8:x4") : !listsplat(llvm_i32_ty, 4),
+
);
}
@@ -403,6 +408,17 @@ class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> {
!subst("llvm.", "int_", intr));
}
+class STMATRIX_NAME<WMMA_REGS Frag, int Trans> {
+ string intr = "llvm.nvvm.stmatrix.sync.aligned"
+ # "." # Frag.geom
+ # "." # Frag.frag
+ # !if(Trans, ".trans", "")
+ # "." # Frag.ptx_elt_type
+ ;
+ string record = !subst(".", "_",
+ !subst("llvm.", "int_", intr));
+}
+
// Generates list of 4-tuples of WMMA_REGS representing a valid MMA op.
// Geom: list of supported geometries.
// TypeN: PTX type of the corresponding fragment's element.
@@ -443,6 +459,16 @@ class LDMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
list<string> ops = !foreach(x, ret, x.gft);
}
+class STMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
+ list<WMMA_REGS> ret =
+ !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
+ !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
+ !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
+ [WMMA_REGS<geom, frag, type>]))))));
+ // Debugging aid for readable representation of the list above.
+ list<string> ops = !foreach(x, ret, x.gft);
+}
+
// Creates list of valid combinations of fragments. This is the main list that
// drives generation of corresponding intrinsics and instructions.
class NVVM_MMA_OPS {
@@ -537,9 +563,18 @@ class NVVM_MMA_OPS {
list<WMMA_REGS> ldmatrix_geom_m8n16_ops = LDMATRIX_OPS<
["m8n16"], ["x1", "x2", "x4"], ["b8x16.b6x16_p32", "b8x16.b4x16_p64"]>.ret;
+ list<WMMA_REGS> stmatrix_b16_ops = STMATRIX_OPS<
+ ["m8n8"], ["x1", "x2", "x4"], ["b16"]>.ret;
+
+ list<WMMA_REGS> stmatrix_b8_ops = STMATRIX_OPS<
+ ["m16n8"], ["x1", "x2", "x4"], ["b8"]>.ret;
+
list<WMMA_REGS> all_ldmatrix_ops = !listconcat(ldmatrix_b16_ops,
ldmatrix_geom_m16n16_ops,
ldmatrix_geom_m8n16_ops);
+
+ list<WMMA_REGS> all_stmatrix_ops = !listconcat(stmatrix_b16_ops,
+ stmatrix_b8_ops);
}
def NVVM_MMA_OPS : NVVM_MMA_OPS;
@@ -680,6 +715,19 @@ class NVVM_LDMATRIX_SUPPORTED<WMMA_REGS frag, bit trans> {
);
}
+// Returns true if the fragment is valid for stmatrix ops is supported;
+// false otherwise.
+class NVVM_STMATRIX_SUPPORTED<WMMA_REGS frag, bit trans> {
+ string g = frag.geom;
+ string t = frag.ptx_elt_type;
+
+ bit ret = !cond(
+ !and(!eq(g, "m8n8"), !eq(t, "b16")): true,
+ !and(!eq(g, "m16n8"), !eq(t, "b8"), !eq(trans, 1)): true,
+ true: false
+ );
+}
+
class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
string Suffix = !if(sync, "sync_", "")
# mode # "_"
@@ -1969,6 +2017,23 @@ foreach transposed = [0, 1] in {
}
}
+// STMATRIX
+class NVVM_STMATRIX<WMMA_REGS Frag, int Transposed>
+ : Intrinsic<[],
+ !listconcat([llvm_anyptr_ty], Frag.regs),
+ [IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
+ WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
+ STMATRIX_NAME<Frag, Transposed>.intr>;
+
+foreach transposed = [0, 1] in {
+ foreach frag = NVVM_MMA_OPS.all_stmatrix_ops in {
+ if NVVM_STMATRIX_SUPPORTED<frag, transposed>.ret then {
+ def STMATRIX_NAME<frag, transposed>.record
+ : NVVM_STMATRIX<frag, transposed>;
+ }
+ }
+}
+
// MAPA
let IntrProperties = [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>] in {
def int_nvvm_mapa
@@ -2024,9 +2089,7 @@ foreach dim = 1...5 in {
tensor_dim_args, // actual tensor dims
[llvm_i64_ty]), // cache_hint
[llvm_i1_ty], // Flag for cache_hint
- [IntrConvergent,
- ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
- NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>]>;
+ [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
// Intrinsics for TMA Copy with reduction
foreach red_op = ["add", "min", "max", "inc", "dec", "and", "or", "xor"] in
@@ -2037,18 +2100,31 @@ foreach dim = 1...5 in {
tensor_dim_args, // actual tensor dims
[llvm_i64_ty]), // cache_hint
[llvm_i1_ty], // Flag for cache_hint
- [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
- NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>]>;
+ [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
}
}
+// TMA S2G tile::scatter4
+def int_nvvm_cp_async_bulk_tensor_s2g_tile_scatter4_2d
+ : DefaultAttrsIntrinsicFlags<[],
+ !listconcat([llvm_shared_ptr_ty, // src_smem_ptr
+ llvm_ptr_ty], // tensormap_ptr
+ !listsplat(llvm_i32_ty, 5), // dims
+ [llvm_i64_ty]), // cache_hint
+ [llvm_i1_ty], // Flag for cache_hint
+ [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
+
// TMA Tensor Copy Intrinsics: G2S -> From Global to Shared memory variants
foreach dim = 1...5 in {
defvar tensor_dim_args = !listsplat(llvm_i32_ty, dim);
- foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
+ foreach mode = !if(!ge(dim, 3), ["tile", "im2col", "im2col_w", "im2col_w_128"], ["tile"]) in {
defvar is_im2col = !eq(mode, "im2col");
- defvar num_im2col_offsets = !if(is_im2col, !add(dim, -2), 0);
+ defvar is_im2colw = !or(!eq(mode, "im2col_w"), !eq(mode, "im2col_w_128"));
+
+ // For im2col_w/w128 modes, the num_offsets is always 2.
+ // For im2col mode, the num_offsets is (dim - 2).
+ defvar num_im2col_offsets = !if(is_im2colw, 2, !if(is_im2col, !add(dim, -2), 0));
defvar im2col_offsets_args = !listsplat(llvm_i16_ty, num_im2col_offsets);
defvar g2s_params = !listconcat(
@@ -2079,11 +2155,60 @@ foreach dim = 1...5 in {
im2col_offsets_args, // im2col offsets
[llvm_i64_ty]), // cache_hint
[llvm_i1_ty], // Flag for cache_hint
- [IntrConvergent,
- ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+ [IntrConvergent, ReadOnly<ArgIndex<0>>]>;
+
+ def int_nvvm_cp_async_bulk_tensor_g2s_cta_ # mode # _ # dim # d :
+ DefaultAttrsIntrinsicFlags<[],
+ !listconcat([llvm_shared_ptr_ty, // dst_ptr
+ llvm_shared_ptr_ty, // mbarrier_ptr
+ llvm_ptr_ty], // tensormap_ptr
+ tensor_dim_args, // actual tensor dims
+ im2col_offsets_args, // im2col offsets
+ [llvm_i64_ty]), // cache_hint
+ [llvm_i1_ty], // Flag for cache_hint
+ [IntrConvergent, WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>]>;
}
}
+// TMA copy for tile::gather4
+def int_nvvm_cp_async_bulk_tensor_g2s_tile_gather4_2d
+ : DefaultAttrsIntrinsicFlags<[],
+ !listconcat(
+ [llvm_shared_cluster_ptr_ty, // dst_shared_cluster_ptr
+ llvm_shared_ptr_ty, // mbarrier_ptr
+ llvm_ptr_ty], // tensormap_ptr
+ !listsplat(llvm_i32_ty, 5), // co-ordinates
+ [llvm_i16_ty, // cta_mask
+ llvm_i64_ty]), // cache_hint
+ [llvm_i1_ty, // Flag for cta_mask
+ llvm_i1_ty, // Flag for cache_hint
+ llvm_i32_ty], // Flag for cta_group
+ [IntrConvergent,
+ WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
+ // Allowed values for cta_group are {0,1,2} i.e [0, 3).
+ Range<ArgIndex<12>, 0, 3>]>;
+
+def int_nvvm_cp_async_bulk_tensor_g2s_cta_tile_gather4_2d
+ : DefaultAttrsIntrinsicFlags<[],
+ !listconcat(
+ [llvm_shared_ptr_ty, // dst_shared_ptr
+ llvm_shared_ptr_ty, // mbarrier_ptr
+ llvm_ptr_ty], // tensormap_ptr
+ !listsplat(llvm_i32_ty, 5), // co-ordinates
+ [llvm_i64_ty]), // cache_hint
+ [llvm_i1_ty], // Flag for cache_hint
+ [IntrConvergent,
+ WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>]>;
+
+// TMA prefetch for tile::gather4
+def int_nvvm_cp_async_bulk_tensor_prefetch_tile_gather4_2d
+ : DefaultAttrsIntrinsicFlags<[],
+ !listconcat([llvm_ptr_ty], // tensormap_ptr
+ !listsplat(llvm_i32_ty, 5), // co-ordinates
+ [llvm_i64_ty]), // cache_hint
+ [llvm_i1_ty], // Flag for cache_hint
+ [IntrConvergent, ReadOnly<ArgIndex<0>>]>;
+
// Intrinsics for Prefetch and Prefetchu
let IntrProperties = [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>] in {
foreach level = ["L1", "L2"] in {
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index f592ff2..c1e4b97 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -43,6 +43,10 @@ def int_wasm_ref_is_null_exn :
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_exnref_ty], [IntrNoMem],
"llvm.wasm.ref.is_null.exn">;
+def int_wasm_ref_test_func
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_vararg_ty],
+ [IntrNoMem]>;
+
//===----------------------------------------------------------------------===//
// Table intrinsics
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index 737610b..0fd5de3 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -112,7 +112,6 @@ inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
return false;
}
llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
- return false;
}
inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
@@ -179,7 +178,6 @@ inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
}
llvm_unreachable(
"Checking invalid f2i/d2i intrinsic for signed int conversion");
- return false;
}
inline APFloat::roundingMode
@@ -250,7 +248,6 @@ GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) {
return APFloat::rmTowardZero;
}
llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
- return APFloat::roundingMode::Invalid;
}
inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
@@ -280,7 +277,6 @@ inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
return false;
}
llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
- return false;
}
inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
@@ -310,7 +306,6 @@ inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
return false;
}
llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
- return false;
}
inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
@@ -340,7 +335,83 @@ inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
return false;
}
llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
- return false;
+}
+
+inline bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_ceil_ftz_f:
+ case Intrinsic::nvvm_fabs_ftz:
+ case Intrinsic::nvvm_floor_ftz_f:
+ case Intrinsic::nvvm_round_ftz_f:
+ case Intrinsic::nvvm_saturate_ftz_f:
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
+ return true;
+ case Intrinsic::nvvm_ceil_f:
+ case Intrinsic::nvvm_ceil_d:
+ case Intrinsic::nvvm_fabs:
+ case Intrinsic::nvvm_floor_f:
+ case Intrinsic::nvvm_floor_d:
+ case Intrinsic::nvvm_round_f:
+ case Intrinsic::nvvm_round_d:
+ case Intrinsic::nvvm_saturate_d:
+ case Intrinsic::nvvm_saturate_f:
+ case Intrinsic::nvvm_sqrt_f:
+ case Intrinsic::nvvm_sqrt_rn_d:
+ case Intrinsic::nvvm_sqrt_rn_f:
+ return false;
+ }
+ llvm_unreachable("Checking FTZ flag for invalid unary intrinsic");
+}
+
+inline bool RCPShouldFTZ(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
+ return true;
+ case Intrinsic::nvvm_rcp_rm_d:
+ case Intrinsic::nvvm_rcp_rm_f:
+ case Intrinsic::nvvm_rcp_rn_d:
+ case Intrinsic::nvvm_rcp_rn_f:
+ case Intrinsic::nvvm_rcp_rp_d:
+ case Intrinsic::nvvm_rcp_rp_f:
+ case Intrinsic::nvvm_rcp_rz_d:
+ case Intrinsic::nvvm_rcp_rz_f:
+ return false;
+ }
+ llvm_unreachable("Checking FTZ flag for invalid rcp intrinsic");
+}
+
+inline APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_rcp_rm_f:
+ case Intrinsic::nvvm_rcp_rm_d:
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
+ return APFloat::rmTowardNegative;
+
+ case Intrinsic::nvvm_rcp_rn_f:
+ case Intrinsic::nvvm_rcp_rn_d:
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ return APFloat::rmNearestTiesToEven;
+
+ case Intrinsic::nvvm_rcp_rp_f:
+ case Intrinsic::nvvm_rcp_rp_d:
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
+ return APFloat::rmTowardPositive;
+
+ case Intrinsic::nvvm_rcp_rz_f:
+ case Intrinsic::nvvm_rcp_rz_d:
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
+ return APFloat::rmTowardZero;
+ }
+ llvm_unreachable("Checking rounding mode for invalid rcp intrinsic");
+}
+
+inline DenormalMode GetNVVMDenromMode(bool ShouldFTZ) {
+ if (ShouldFTZ)
+ return DenormalMode::getPreserveSign();
+ return DenormalMode::getIEEE();
}
} // namespace nvvm
diff --git a/llvm/include/llvm/IR/PassInstrumentation.h b/llvm/include/llvm/IR/PassInstrumentation.h
index 0315715..33eda5a 100644
--- a/llvm/include/llvm/IR/PassInstrumentation.h
+++ b/llvm/include/llvm/IR/PassInstrumentation.h
@@ -164,7 +164,7 @@ public:
/// Add a class name to pass name mapping for use by pass instrumentation.
LLVM_ABI void addClassToPassName(StringRef ClassName, StringRef PassName);
- /// Get the pass name for a given pass class name.
+ /// Get the pass name for a given pass class name. Empty if no match found.
LLVM_ABI StringRef getPassNameForClassName(StringRef ClassName);
private:
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 50e50a9..27c5d5c 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -822,12 +822,52 @@ template <typename Class> struct bind_ty {
}
};
+/// Check whether the value has the given Class and matches the nested
+/// pattern. Capture it into the provided variable if successful.
+template <typename Class, typename MatchTy> struct bind_and_match_ty {
+ Class *&VR;
+ MatchTy Match;
+
+ bind_and_match_ty(Class *&V, const MatchTy &Match) : VR(V), Match(Match) {}
+
+ template <typename ITy> bool match(ITy *V) const {
+ auto *CV = dyn_cast<Class>(V);
+ if (CV && Match.match(V)) {
+ VR = CV;
+ return true;
+ }
+ return false;
+ }
+};
+
/// Match a value, capturing it if we match.
inline bind_ty<Value> m_Value(Value *&V) { return V; }
inline bind_ty<const Value> m_Value(const Value *&V) { return V; }
+/// Match against the nested pattern, and capture the value if we match.
+template <typename MatchTy>
+inline bind_and_match_ty<Value, MatchTy> m_Value(Value *&V,
+ const MatchTy &Match) {
+ return {V, Match};
+}
+
+/// Match against the nested pattern, and capture the value if we match.
+template <typename MatchTy>
+inline bind_and_match_ty<const Value, MatchTy> m_Value(const Value *&V,
+ const MatchTy &Match) {
+ return {V, Match};
+}
+
/// Match an instruction, capturing it if we match.
inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
+
+/// Match against the nested pattern, and capture the instruction if we match.
+template <typename MatchTy>
+inline bind_and_match_ty<Instruction, MatchTy>
+m_Instruction(Instruction *&I, const MatchTy &Match) {
+ return {I, Match};
+}
+
/// Match a unary operator, capturing it if we match.
inline bind_ty<UnaryOperator> m_UnOp(UnaryOperator *&I) { return I; }
/// Match a binary operator, capturing it if we match.
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 2e231cf..31801da 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -119,7 +119,6 @@ LLVM_ABI void initializeExpandVariadicsPass(PassRegistry &);
LLVM_ABI void initializeExternalAAWrapperPassPass(PassRegistry &);
LLVM_ABI void initializeFEntryInserterLegacyPass(PassRegistry &);
LLVM_ABI void initializeFinalizeISelPass(PassRegistry &);
-LLVM_ABI void initializeFinalizeMachineBundlesPass(PassRegistry &);
LLVM_ABI void initializeFixIrreduciblePass(PassRegistry &);
LLVM_ABI void initializeFixupStatepointCallerSavedLegacyPass(PassRegistry &);
LLVM_ABI void initializeFlattenCFGLegacyPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllIR.h b/llvm/include/llvm/LinkAllIR.h
index ceed784..894a8dd 100644
--- a/llvm/include/llvm/LinkAllIR.h
+++ b/llvm/include/llvm/LinkAllIR.h
@@ -21,6 +21,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/Support/AlwaysTrue.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Memory.h"
@@ -29,19 +30,16 @@
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/Signals.h"
-#include <cstdlib>
namespace {
struct ForceVMCoreLinking {
ForceVMCoreLinking() {
// We must reference VMCore in such a way that compilers will not
- // delete it all as dead code, even with whole program optimization,
- // yet is effectively a NO-OP. As the compiler isn't smart enough
- // to know that getenv() never returns -1, this will do the job.
+ // delete it all as dead code, even with whole program optimization.
// This is so that globals in the translation units where these functions
// are defined are forced to be initialized, populating various
// registries.
- if (std::getenv("bar") != (char*) -1)
+ if (llvm::getNonFoldableAlwaysTrue())
return;
llvm::LLVMContext Context;
(void)new llvm::Module("", Context);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index bae7f0d..f82a439 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/Support/AlwaysTrue.h"
#include "llvm/Support/Valgrind.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
@@ -54,14 +55,12 @@ class Triple;
namespace {
struct ForcePassLinking {
ForcePassLinking() {
- // We must reference the passes in such a way that compilers will not
- // delete it all as dead code, even with whole program optimization,
- // yet is effectively a NO-OP. As the compiler isn't smart enough
- // to know that getenv() never returns -1, this will do the job.
- // This is so that globals in the translation units where these functions
- // are defined are forced to be initialized, populating various
- // registries.
- if (std::getenv("bar") != (char *)-1)
+ // We must reference the passes in such a way that compilers will not delete
+ // it all as dead code, even with whole program optimization, yet is
+ // effectively a NO-OP. This is so that globals in the translation units
+ // where these functions are defined are forced to be initialized,
+ // populating various registries.
+ if (llvm::getNonFoldableAlwaysTrue())
return;
(void)llvm::createAtomicExpandLegacyPass();
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index 0322cbe..bfc1175 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -18,9 +18,7 @@
namespace llvm {
-class MCAlignFragment;
class MCFragment;
-class MCLEBFragment;
class MCSymbol;
class MCAssembler;
class MCContext;
@@ -60,6 +58,9 @@ protected: // Can only create subclasses.
MCAssembler *Asm = nullptr;
+ bool AllowAutoPadding = false;
+ bool AllowEnhancedRelaxation = false;
+
public:
MCAsmBackend(const MCAsmBackend &) = delete;
MCAsmBackend &operator=(const MCAsmBackend &) = delete;
@@ -73,11 +74,11 @@ public:
/// Return true if this target might automatically pad instructions and thus
/// need to emit padding enable/disable directives around sensative code.
- virtual bool allowAutoPadding() const { return false; }
+ bool allowAutoPadding() const { return AllowAutoPadding; }
/// Return true if this target allows an unrelaxable instruction to be
/// emitted into RelaxableFragment and then we can increase its size in a
/// tricky way for optimization.
- virtual bool allowEnhancedRelaxation() const { return false; }
+ bool allowEnhancedRelaxation() const { return AllowEnhancedRelaxation; }
/// lifetime management
virtual void reset() {}
@@ -105,21 +106,6 @@ public:
/// Get information on a fixup kind.
virtual MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const;
- /// Hook to check if extra nop bytes must be inserted for alignment directive.
- /// For some targets this may be necessary in order to support linker
- /// relaxation. The number of bytes to insert are returned in Size.
- virtual bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF,
- unsigned &Size) {
- return false;
- }
-
- /// Hook which indicates if the target requires a fixup to be generated when
- /// handling an align directive in an executable section
- virtual bool shouldInsertFixupForCodeAlign(MCAssembler &Asm,
- MCAlignFragment &AF) {
- return false;
- }
-
// Evaluate a fixup, returning std::nullopt to use default handling for
// `Value` and `IsResolved`. Otherwise, returns `IsResolved` with the
// expectation that the hook updates `Value`.
@@ -177,6 +163,10 @@ public:
}
// Defined by linker relaxation targets.
+
+ // Return false to use default handling. Otherwise, set `Size` to the number
+ // of padding bytes.
+ virtual bool relaxAlign(MCFragment &F, unsigned &Size) { return false; }
virtual bool relaxDwarfLineAddr(MCFragment &, bool &WasRelaxed) const {
return false;
}
diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h
index ade9ee6f..467ad4e 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -36,8 +36,6 @@ class MCCVDefRangeFragment;
class MCCVInlineLineTableFragment;
class MCFragment;
class MCFixup;
-class MCLEBFragment;
-class MCPseudoProbeAddrFragment;
class MCSymbolRefExpr;
class raw_ostream;
class MCAsmBackend;
@@ -123,7 +121,6 @@ private:
bool relaxCVInlineLineTable(MCCVInlineLineTableFragment &DF);
bool relaxCVDefRange(MCCVDefRangeFragment &DF);
bool relaxFill(MCFillFragment &F);
- bool relaxPseudoProbeAddr(MCPseudoProbeAddrFragment &DF);
public:
/// Construct a new assembler instance.
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index 319e131..2ceeba2 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -54,7 +54,6 @@ class MCObjectStreamer : public MCStreamer {
void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &);
void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
- void emitInstructionImpl(const MCInst &Inst, const MCSubtargetInfo &STI);
protected:
MCObjectStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
@@ -73,13 +72,6 @@ public:
MCSymbol *emitCFILabel() override;
void emitCFISections(bool EH, bool Debug) override;
- /// Get a data fragment to write into, creating a new one if the current
- /// fragment is not FT_Data.
- MCFragment *getOrCreateDataFragment();
-
-protected:
- bool changeSectionImpl(MCSection *Section, uint32_t Subsection);
-
public:
void visitUsedSymbol(const MCSymbol &Sym) override;
@@ -88,6 +80,15 @@ public:
/// \name MCStreamer Interface
/// @{
+ // Add a fragment with a variable-size tail and start a new empty fragment.
+ void insert(MCFragment *F);
+
+ // Add a new fragment to the current section without a variable-size tail.
+ void newFragment();
+
+ void appendContents(size_t Num, char Elt);
+ void addFixup(const MCExpr *Value, MCFixupKind Kind);
+
void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
virtual void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment &F,
uint64_t Offset);
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 313071e..87a8349 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -91,8 +91,7 @@ private:
bool IsRegistered : 1;
bool IsText : 1;
-
- bool IsVirtual : 1;
+ bool IsBss : 1;
/// Whether the section contains linker-relaxable fragments. If true, the
/// offset between two locations may not be fully resolved.
@@ -113,7 +112,7 @@ protected:
StringRef Name;
SectionVariant Variant;
- MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsVirtual,
+ MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsBss,
MCSymbol *Begin);
// Protected non-virtual dtor prevents destroy through a base class pointer.
~MCSection() {}
@@ -178,9 +177,7 @@ public:
/// Check whether this section is "virtual", that is has no actual object
/// file contents.
- bool isVirtualSection() const { return IsVirtual; }
-
- virtual StringRef getVirtualSectionKind() const;
+ bool isBssSection() const { return IsBss; }
};
// Represents a contiguous piece of code or data within a section. Its size is
@@ -207,7 +204,6 @@ public:
FT_SymbolId,
FT_CVInlineLines,
FT_CVDefRange,
- FT_PseudoProbe,
};
private:
@@ -235,11 +231,16 @@ protected:
/// FT_Relaxable, x86-specific
bool AllowAutoPadding : 1;
+ // Track content and fixups for the fixed-size part as fragments are
+ // appended to the section. The content remains immutable, except when
+ // modified by applyFixup.
uint32_t ContentStart = 0;
uint32_t ContentEnd = 0;
uint32_t FixupStart = 0;
uint32_t FixupEnd = 0;
+ // Track content and fixups for the optional variable-size tail part,
+ // typically modified during relaxation.
uint32_t VarContentStart = 0;
uint32_t VarContentEnd = 0;
uint32_t VarFixupStart = 0;
@@ -256,6 +257,19 @@ protected:
uint32_t OperandSize;
} relax;
struct {
+ // The alignment to ensure, in bytes.
+ Align Alignment;
+ // The size of the integer (in bytes) of \p Value.
+ uint8_t FillLen;
+ // If true, fill with target-specific nop instructions.
+ bool EmitNops;
+ // The maximum number of bytes to emit; if the alignment
+ // cannot be satisfied in this width then this fragment is ignored.
+ unsigned MaxBytesToEmit;
+ // Value to use for filling padding bytes.
+ int64_t Fill;
+ } align;
+ struct {
// True if this is a sleb128, false if uleb128.
bool IsSigned;
// The value this fragment should contain.
@@ -284,10 +298,10 @@ public:
return false;
case MCFragment::FT_Relaxable:
case MCFragment::FT_Data:
+ case MCFragment::FT_Align:
case MCFragment::FT_Dwarf:
case MCFragment::FT_DwarfFrame:
case MCFragment::FT_LEB:
- case MCFragment::FT_PseudoProbe:
case MCFragment::FT_CVInlineLines:
case MCFragment::FT_CVDefRange:
return true;
@@ -329,9 +343,9 @@ public:
bool getAllowAutoPadding() const { return AllowAutoPadding; }
void setAllowAutoPadding(bool V) { AllowAutoPadding = V; }
- // Content-related functions manage parent's storage using ContentStart and
+ //== Content-related functions manage parent's storage using ContentStart and
// ContentSize.
- void clearContents() { ContentEnd = ContentStart; }
+
// Get a SmallVector reference. The caller should call doneAppending to update
// `ContentEnd`.
SmallVectorImpl<char> &getContentsForAppending() {
@@ -355,7 +369,6 @@ public:
getContentsForAppending().append(Num, Elt);
doneAppending();
}
- LLVM_ABI void setContents(ArrayRef<char> Contents);
MutableArrayRef<char> getContents() {
return MutableArrayRef(getParent()->ContentStorage)
.slice(ContentStart, ContentEnd - ContentStart);
@@ -387,7 +400,6 @@ public:
void clearFixups() { FixupEnd = FixupStart; }
LLVM_ABI void addFixup(MCFixup Fixup);
LLVM_ABI void appendFixups(ArrayRef<MCFixup> Fixups);
- LLVM_ABI void setFixups(ArrayRef<MCFixup> Fixups);
MutableArrayRef<MCFixup> getFixups() {
return MutableArrayRef(getParent()->FixupStorage)
.slice(FixupStart, FixupEnd - FixupStart);
@@ -442,7 +454,45 @@ public:
llvm::copy(Inst, S.begin() + u.relax.OperandStart);
}
+ //== FT_Align functions
+ void makeAlign(Align Alignment, int64_t Fill, uint8_t FillLen,
+ unsigned MaxBytesToEmit) {
+ Kind = FT_Align;
+ u.align.EmitNops = false;
+ u.align.Alignment = Alignment;
+ u.align.Fill = Fill;
+ u.align.FillLen = FillLen;
+ u.align.MaxBytesToEmit = MaxBytesToEmit;
+ }
+
+ Align getAlignment() const {
+ assert(Kind == FT_Align);
+ return u.align.Alignment;
+ }
+ int64_t getAlignFill() const {
+ assert(Kind == FT_Align);
+ return u.align.Fill;
+ }
+ uint8_t getAlignFillLen() const {
+ assert(Kind == FT_Align);
+ return u.align.FillLen;
+ }
+ unsigned getAlignMaxBytesToEmit() const {
+ assert(Kind == FT_Align);
+ return u.align.MaxBytesToEmit;
+ }
+ bool hasAlignEmitNops() const {
+ assert(Kind == FT_Align);
+ return u.align.EmitNops;
+ }
+
//== FT_LEB functions
+ void makeLEB(bool IsSigned, const MCExpr *Value) {
+ assert(Kind == FT_Data);
+ Kind = MCFragment::FT_LEB;
+ u.leb.IsSigned = IsSigned;
+ u.leb.Value = Value;
+ }
const MCExpr &getLEBValue() const {
assert(Kind == FT_LEB);
return *u.leb.Value;
@@ -455,10 +505,6 @@ public:
assert(Kind == FT_LEB);
return u.leb.IsSigned;
}
- void setLEBSigned(bool S) {
- assert(Kind == FT_LEB);
- u.leb.IsSigned = S;
- }
//== FT_DwarfFrame functions
const MCExpr &getDwarfAddrDelta() const {
@@ -487,52 +533,6 @@ protected:
: MCFragment(FType, HasInstructions) {}
};
-class MCAlignFragment : public MCFragment {
- /// Flag to indicate that (optimal) NOPs should be emitted instead
- /// of using the provided value. The exact interpretation of this flag is
- /// target dependent.
- bool EmitNops : 1;
-
- /// The alignment to ensure, in bytes.
- Align Alignment;
-
- /// The size of the integer (in bytes) of \p Value.
- uint8_t FillLen;
-
- /// The maximum number of bytes to emit; if the alignment
- /// cannot be satisfied in this width then this fragment is ignored.
- unsigned MaxBytesToEmit;
-
- /// Value to use for filling padding bytes.
- int64_t Fill;
-
- /// When emitting Nops some subtargets have specific nop encodings.
- const MCSubtargetInfo *STI = nullptr;
-
-public:
- MCAlignFragment(Align Alignment, int64_t Fill, uint8_t FillLen,
- unsigned MaxBytesToEmit)
- : MCFragment(FT_Align, false), EmitNops(false), Alignment(Alignment),
- FillLen(FillLen), MaxBytesToEmit(MaxBytesToEmit), Fill(Fill) {}
-
- Align getAlignment() const { return Alignment; }
- int64_t getFill() const { return Fill; }
- uint8_t getFillLen() const { return FillLen; }
- unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
-
- bool hasEmitNops() const { return EmitNops; }
- void setEmitNops(bool Value, const MCSubtargetInfo *STI) {
- EmitNops = Value;
- this->STI = STI;
- }
-
- const MCSubtargetInfo *getSubtargetInfo() const { return STI; }
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_Align;
- }
-};
-
class MCFillFragment : public MCFragment {
uint8_t ValueSize;
/// Value to use for filling bytes.
@@ -730,22 +730,6 @@ public:
}
};
-class MCPseudoProbeAddrFragment : public MCEncodedFragment {
- /// The expression for the difference of the two symbols that
- /// make up the address delta between two .pseudoprobe directives.
- const MCExpr *AddrDelta;
-
-public:
- MCPseudoProbeAddrFragment(const MCExpr *AddrDelta)
- : MCEncodedFragment(FT_PseudoProbe, false), AddrDelta(AddrDelta) {}
-
- const MCExpr &getAddrDelta() const { return *AddrDelta; }
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_PseudoProbe;
- }
-};
-
inline MCSection::iterator &MCSection::iterator::operator++() {
F = F->Next;
return *this;
diff --git a/llvm/include/llvm/MC/MCSectionCOFF.h b/llvm/include/llvm/MC/MCSectionCOFF.h
index 4472a12..f979413a 100644
--- a/llvm/include/llvm/MC/MCSectionCOFF.h
+++ b/llvm/include/llvm/MC/MCSectionCOFF.h
@@ -82,7 +82,6 @@ public:
raw_ostream &OS,
uint32_t Subsection) const override;
bool useCodeAlign() const override;
- StringRef getVirtualSectionKind() const override;
unsigned getOrAssignWinCFISectionID(unsigned *NextID) const {
if (WinCFISectionID == ~0U)
diff --git a/llvm/include/llvm/MC/MCSectionELF.h b/llvm/include/llvm/MC/MCSectionELF.h
index f09d305..64a4daf 100644
--- a/llvm/include/llvm/MC/MCSectionELF.h
+++ b/llvm/include/llvm/MC/MCSectionELF.h
@@ -68,10 +68,6 @@ private:
Group.getPointer()->setIsSignature();
}
- // TODO Delete after we stop supporting generation of GNU-style .zdebug_*
- // sections.
- void setSectionName(StringRef Name) { this->Name = Name; }
-
public:
/// Decides whether a '.section' directive should be printed before the
/// section name
@@ -88,7 +84,6 @@ public:
raw_ostream &OS,
uint32_t Subsection) const override;
bool useCodeAlign() const override;
- StringRef getVirtualSectionKind() const override;
bool isUnique() const { return UniqueID != NonUniqueID; }
unsigned getUniqueID() const { return UniqueID; }
diff --git a/llvm/include/llvm/MC/MCSectionGOFF.h b/llvm/include/llvm/MC/MCSectionGOFF.h
index 9e3f95e..b166397 100644
--- a/llvm/include/llvm/MC/MCSectionGOFF.h
+++ b/llvm/include/llvm/MC/MCSectionGOFF.h
@@ -111,7 +111,7 @@ public:
// Returns the text style for a section. Only defined for ED and PR sections.
GOFF::ESDTextStyle getTextStyle() const {
- assert((isED() || isPR() || isVirtualSection()) && "Expect ED or PR section");
+ assert((isED() || isPR() || isBssSection()) && "Expect ED or PR section");
if (isED())
return EDAttributes.TextStyle;
if (isPR())
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index 4b91dbc..4bfc8f9 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -259,6 +259,8 @@ class LLVM_ABI MCStreamer {
bool AllowAutoPadding = false;
protected:
+ bool IsObj = false;
+
// Symbol of the current epilog for which we are processing SEH directives.
WinEH::FrameInfo::Epilog *CurrentWinEpilog = nullptr;
@@ -270,6 +272,8 @@ protected:
/// section changes.
virtual void changeSection(MCSection *, uint32_t);
+ void addFragment(MCFragment *F);
+
virtual void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame);
virtual void emitCFIEndProcImpl(MCDwarfFrameInfo &CurFrame);
@@ -308,6 +312,7 @@ public:
virtual void reset();
MCContext &getContext() const { return Context; }
+ bool isObj() const { return IsObj; }
// MCObjectStreamer has an MCAssembler and allows more expression folding at
// parse time.
@@ -425,10 +430,15 @@ public:
}
MCFragment *getCurrentFragment() const {
+ // Ensure consistency with the section stack.
assert(!getCurrentSection().first ||
CurFrag->getParent() == getCurrentSection().first);
+ // Ensure we eagerly allocate an empty fragment after adding fragment with a
+ // variable-size tail.
+ assert(!CurFrag || CurFrag->getKind() == MCFragment::FT_Data);
return CurFrag;
}
+ size_t getCurFragOffset() const { return getCurrentFragment()->Offset; }
/// Save the current and previous section on the section stack.
void pushSection() {
SectionStack.push_back(
@@ -456,9 +466,6 @@ public:
MCSymbol *endSection(MCSection *Section);
- void insert(MCFragment *F);
- void newFragment();
-
/// Returns the mnemonic for \p MI, if the streamer has access to a
/// instruction printer and returns an empty string otherwise.
virtual StringRef getMnemonic(const MCInst &MI) const { return ""; }
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index a3aa0d9..ced1afd 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -1479,6 +1479,7 @@ template <class ELFT> Triple::OSType ELFObjectFile<ELFT>::getOS() const {
case ELF::ELFOSABI_OPENBSD:
return Triple::OpenBSD;
case ELF::ELFOSABI_CUDA:
+ case ELF::ELFOSABI_CUDA_V2:
return Triple::CUDA;
case ELF::ELFOSABI_AMDGPU_HSA:
return Triple::AMDHSA;
diff --git a/llvm/include/llvm/Object/SFrameParser.h b/llvm/include/llvm/Object/SFrameParser.h
new file mode 100644
index 0000000..cf4fe20
--- /dev/null
+++ b/llvm/include/llvm/Object/SFrameParser.h
@@ -0,0 +1,48 @@
+//===- SFrameParser.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_SFRAME_H
+#define LLVM_OBJECT_SFRAME_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/BinaryFormat/SFrame.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+
+namespace llvm {
+namespace object {
+
+template <endianness E> class SFrameParser {
+public:
+ static Expected<SFrameParser> create(ArrayRef<uint8_t> Contents);
+
+ const sframe::Preamble<E> &getPreamble() const { return Header.Preamble; }
+ const sframe::Header<E> &getHeader() const { return Header; }
+
+ bool usesFixedRAOffset() const {
+ return getHeader().ABIArch == sframe::ABI::AMD64EndianLittle;
+ }
+ bool usesFixedFPOffset() const {
+ return false; // Not used in any currently defined ABI.
+ }
+
+private:
+ ArrayRef<uint8_t> Data;
+ const sframe::Header<E> &Header;
+
+ SFrameParser(ArrayRef<uint8_t> Data, const sframe::Header<E> &Header)
+ : Data(Data), Header(Header) {}
+};
+
+extern template class SFrameParser<endianness::big>;
+extern template class SFrameParser<endianness::little>;
+
+} // end namespace object
+} // end namespace llvm
+
+#endif // LLVM_OBJECT_SFRAME_H
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 732fdc7..bee2106 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass())
MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass())
MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass())
MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass())
+MACHINE_FUNCTION_PASS("finalizebundle-test", FinalizeBundleTestPass())
MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", FixupStatepointCallerSavedPass())
MACHINE_FUNCTION_PASS("init-undef", InitUndefPass())
MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass())
diff --git a/llvm/include/llvm/Support/AlwaysTrue.h b/llvm/include/llvm/Support/AlwaysTrue.h
new file mode 100644
index 0000000..b696856
--- /dev/null
+++ b/llvm/include/llvm/Support/AlwaysTrue.h
@@ -0,0 +1,25 @@
+//===--- AlwaysTrue.h - Helper for oqaque truthy values --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ALWAYS_TRUE_H
+#define LLVM_SUPPORT_ALWAYS_TRUE_H
+
+#include <cstdlib>
+
+namespace llvm {
+inline bool getNonFoldableAlwaysTrue() {
+ // Some parts of the codebase require a "constant true value" used as a
+ // predicate. These cases require that even with LTO and static linking,
+ // it's not possible for the compiler to fold the value. As compilers
+ // aren't smart enough to know that getenv() never returns -1, this will do
+ // the job.
+ return std::getenv("LLVM_IGNORED_ENV_VAR") != (char *)-1;
+}
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_ALWAYS_TRUE_H
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 7577792..b65a63b 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -78,6 +78,8 @@ def : GINodeEquiv<G_XOR, xor>;
def : GINodeEquiv<G_SHL, shl>;
def : GINodeEquiv<G_LSHR, srl>;
def : GINodeEquiv<G_ASHR, sra>;
+def : GINodeEquiv<G_ABDS, abds>;
+def : GINodeEquiv<G_ABDU, abdu>;
def : GINodeEquiv<G_SADDSAT, saddsat>;
def : GINodeEquiv<G_UADDSAT, uaddsat>;
def : GINodeEquiv<G_SSUBSAT, ssubsat>;
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 59e8117..8e83b046 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -276,14 +276,14 @@ LLVM_ABI bool isX18ReservedByDefault(const Triple &TT);
// For a given set of feature names, which can be either target-features, or
// fmv-features metadata, expand their dependencies and then return a bitmask
// corresponding to the entries of AArch64::FeatPriorities.
-LLVM_ABI uint64_t getFMVPriority(ArrayRef<StringRef> Features);
+LLVM_ABI APInt getFMVPriority(ArrayRef<StringRef> Features);
// For a given set of FMV feature names, expand their dependencies and then
// return a bitmask corresponding to the entries of AArch64::CPUFeatures.
// The values in CPUFeatures are not bitmasks themselves, they are sequential
// (0, 1, 2, 3, ...). The resulting bitmask is used at runtime to test whether
// a certain FMV feature is available on the host.
-LLVM_ABI uint64_t getCpuSupportsMask(ArrayRef<StringRef> Features);
+LLVM_ABI APInt getCpuSupportsMask(ArrayRef<StringRef> Features);
LLVM_ABI void PrintSupportedExtensions();
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index bb79d25..3f5f427 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -325,7 +325,6 @@ LLVM_ABI void salvageDebugInfo(Instruction &I);
/// Mark undef if salvaging cannot be completed.
LLVM_ABI void
salvageDebugInfoForDbgValues(Instruction &I,
- ArrayRef<DbgVariableIntrinsic *> Insns,
ArrayRef<DbgVariableRecord *> DPInsns);
/// Given an instruction \p I and DIExpression \p DIExpr operating on
diff --git a/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h b/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h
index f288bdf..e0cdcf8 100644
--- a/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h
+++ b/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h
@@ -57,7 +57,6 @@ struct AllocaInfo {
struct StackInfo {
MapVector<AllocaInst *, AllocaInfo> AllocasToInstrument;
- SmallVector<Instruction *, 4> UnrecognizedLifetimes;
SmallVector<Instruction *, 8> RetVec;
bool CallsReturnTwice = false;
};