aboutsummaryrefslogtreecommitdiff
path: root/llvm/include/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/include/llvm')
-rw-r--r--llvm/include/llvm/ADT/CombinationGenerator.h3
-rw-r--r--llvm/include/llvm/ADT/EquivalenceClasses.h8
-rw-r--r--llvm/include/llvm/ADT/STLForwardCompat.h22
-rw-r--r--llvm/include/llvm/ADT/StringTable.h9
-rw-r--r--llvm/include/llvm/Analysis/IR2Vec.h9
-rw-r--r--llvm/include/llvm/Analysis/IVDescriptors.h3
-rw-r--r--llvm/include/llvm/Analysis/MemoryProfileInfo.h6
-rw-r--r--llvm/include/llvm/Analysis/VectorUtils.h5
-rw-r--r--llvm/include/llvm/AsmParser/LLToken.h1
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h1
-rw-r--r--llvm/include/llvm/BinaryFormat/SFrame.h28
-rw-r--r--llvm/include/llvm/BinaryFormat/SFrameConstants.def39
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h23
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h14
-rw-r--r--llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h4
-rw-r--r--llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h5
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h34
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMP.h4
-rw-r--r--llvm/include/llvm/IR/CallingConv.h8
-rw-r--r--llvm/include/llvm/IR/DebugInfo.h34
-rw-r--r--llvm/include/llvm/IR/DebugInfoMetadata.h3
-rw-r--r--llvm/include/llvm/IR/FixedMetadataKinds.def1
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAMDGPU.td21
-rw-r--r--llvm/include/llvm/IR/IntrinsicsNVVM.td143
-rw-r--r--llvm/include/llvm/IR/IntrinsicsSPIRV.td7
-rw-r--r--llvm/include/llvm/IR/Metadata.h9
-rw-r--r--llvm/include/llvm/IR/NVVMIntrinsicUtils.h83
-rw-r--r--llvm/include/llvm/IR/OptBisect.h35
-rw-r--r--llvm/include/llvm/IR/PassManager.h16
-rw-r--r--llvm/include/llvm/IR/RuntimeLibcalls.h12
-rw-r--r--llvm/include/llvm/IR/RuntimeLibcalls.td2
-rw-r--r--llvm/include/llvm/MC/MCAsmBackend.h28
-rw-r--r--llvm/include/llvm/MC/MCAssembler.h11
-rw-r--r--llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h12
-rw-r--r--llvm/include/llvm/MC/MCELFStreamer.h3
-rw-r--r--llvm/include/llvm/MC/MCObjectStreamer.h42
-rw-r--r--llvm/include/llvm/MC/MCSection.h141
-rw-r--r--llvm/include/llvm/MC/MCSectionCOFF.h1
-rw-r--r--llvm/include/llvm/MC/MCSectionELF.h5
-rw-r--r--llvm/include/llvm/MC/MCSectionGOFF.h2
-rw-r--r--llvm/include/llvm/MC/MCStreamer.h21
-rw-r--r--llvm/include/llvm/Object/ELFObjectFile.h8
-rw-r--r--llvm/include/llvm/Object/SFrameParser.h48
-rw-r--r--llvm/include/llvm/Pass.h4
-rw-r--r--llvm/include/llvm/Passes/CodeGenPassBuilder.h2
-rw-r--r--llvm/include/llvm/Target/GlobalISel/Combine.td20
-rw-r--r--llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td2
-rw-r--r--llvm/include/llvm/TargetParser/Host.h2
-rw-r--r--llvm/include/llvm/TargetParser/Triple.h122
-rw-r--r--llvm/include/llvm/Transforms/Utils/Local.h1
-rw-r--r--llvm/include/llvm/Transforms/Utils/SSAUpdater.h4
-rw-r--r--llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h1
52 files changed, 740 insertions, 332 deletions
diff --git a/llvm/include/llvm/ADT/CombinationGenerator.h b/llvm/include/llvm/ADT/CombinationGenerator.h
index 6100aa9..bbdbd9b 100644
--- a/llvm/include/llvm/ADT/CombinationGenerator.h
+++ b/llvm/include/llvm/ADT/CombinationGenerator.h
@@ -118,10 +118,9 @@ public:
: VariablesChoices(VariablesChoices_) {
#ifndef NDEBUG
assert(!VariablesChoices.empty() && "There should be some variables.");
- llvm::for_each(VariablesChoices, [](ArrayRef<choice_type> VariableChoices) {
+ for (ArrayRef<choice_type> VariableChoices : VariablesChoices)
assert(!VariableChoices.empty() &&
"There must always be some choice, at least a placeholder one.");
- });
#endif
}
diff --git a/llvm/include/llvm/ADT/EquivalenceClasses.h b/llvm/include/llvm/ADT/EquivalenceClasses.h
index b1009f8..1a2331c 100644
--- a/llvm/include/llvm/ADT/EquivalenceClasses.h
+++ b/llvm/include/llvm/ADT/EquivalenceClasses.h
@@ -218,12 +218,12 @@ public:
/// insert - Insert a new value into the union/find set, ignoring the request
/// if the value already exists.
const ECValue &insert(const ElemTy &Data) {
- auto I = TheMapping.insert({Data, nullptr});
- if (!I.second)
- return *I.first->second;
+ auto [I, Inserted] = TheMapping.try_emplace(Data);
+ if (!Inserted)
+ return *I->second;
auto *ECV = new (ECValueAllocator) ECValue(Data);
- I.first->second = ECV;
+ I->second = ECV;
Members.push_back(ECV);
return *ECV;
}
diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h
index 7bd2c87..81b9a68 100644
--- a/llvm/include/llvm/ADT/STLForwardCompat.h
+++ b/llvm/include/llvm/ADT/STLForwardCompat.h
@@ -55,21 +55,13 @@ using type_identity_t // NOLINT(readability-identifier-naming)
// TODO: Remove this in favor of std::optional<T>::transform once we switch to
// C++23.
-template <typename T, typename Function>
-auto transformOptional(const std::optional<T> &O, const Function &F)
- -> std::optional<decltype(F(*O))> {
- if (O)
- return F(*O);
- return std::nullopt;
-}
-
-// TODO: Remove this in favor of std::optional<T>::transform once we switch to
-// C++23.
-template <typename T, typename Function>
-auto transformOptional(std::optional<T> &&O, const Function &F)
- -> std::optional<decltype(F(*std::move(O)))> {
- if (O)
- return F(*std::move(O));
+template <typename Optional, typename Function,
+ typename Value = typename llvm::remove_cvref_t<Optional>::value_type>
+std::optional<std::invoke_result_t<Function, Value>>
+transformOptional(Optional &&O, Function &&F) {
+ if (O) {
+ return F(*std::forward<Optional>(O));
+ }
return std::nullopt;
}
diff --git a/llvm/include/llvm/ADT/StringTable.h b/llvm/include/llvm/ADT/StringTable.h
index c089a07..575b3c9 100644
--- a/llvm/include/llvm/ADT/StringTable.h
+++ b/llvm/include/llvm/ADT/StringTable.h
@@ -118,6 +118,13 @@ public:
constexpr Iterator(const Iterator &RHS) = default;
constexpr Iterator(Iterator &&RHS) = default;
+ Iterator &operator=(const Iterator &RHS) {
+ Table = RHS.Table;
+ O = RHS.O;
+ S = RHS.S;
+ return *this;
+ }
+
bool operator==(const Iterator &RHS) const {
assert(Table == RHS.Table && "Compared iterators for unrelated tables!");
return O == RHS.O;
@@ -132,6 +139,8 @@ public:
O = O.value() + (*Table)[O].size() + 1;
return *this;
}
+
+ Offset offset() const { return O; }
};
constexpr Iterator begin() const { return Iterator(*this, 0); }
diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h
index 3d7edf0..d87457c 100644
--- a/llvm/include/llvm/Analysis/IR2Vec.h
+++ b/llvm/include/llvm/Analysis/IR2Vec.h
@@ -170,6 +170,10 @@ public:
unsigned getDimension() const;
size_t size() const;
+ static size_t expectedSize() {
+ return MaxOpcodes + MaxTypeIDs + MaxOperandKinds;
+ }
+
/// Helper function to get vocabulary key for a given Opcode
static StringRef getVocabKeyForOpcode(unsigned Opcode);
@@ -182,6 +186,11 @@ public:
/// Helper function to classify an operand into OperandKind
static OperandKind getOperandKind(const Value *Op);
+ /// Helpers to return the IDs of a given Opcode, TypeID, or OperandKind
+ static unsigned getNumericID(unsigned Opcode);
+ static unsigned getNumericID(Type::TypeID TypeID);
+ static unsigned getNumericID(const Value *Op);
+
/// Accessors to get the embedding for a given entity.
const ir2vec::Embedding &operator[](unsigned Opcode) const;
const ir2vec::Embedding &operator[](Type::TypeID TypeId) const;
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index b985292..1dc7320 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -47,6 +47,8 @@ enum class RecurKind {
FMul, ///< Product of floats.
FMin, ///< FP min implemented in terms of select(cmp()).
FMax, ///< FP max implemented in terms of select(cmp()).
+ FMinNum, ///< FP min with llvm.minnum semantics including NaNs.
+ FMaxNum, ///< FP max with llvm.maxnum semantics including NaNs.
FMinimum, ///< FP min with llvm.minimum semantics
FMaximum, ///< FP max with llvm.maximum semantics
FMinimumNum, ///< FP min with llvm.minimumnum semantics
@@ -250,6 +252,7 @@ public:
/// Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
+ Kind == RecurKind::FMinNum || Kind == RecurKind::FMaxNum ||
Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum ||
Kind == RecurKind::FMinimumNum || Kind == RecurKind::FMaximumNum;
}
diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index b042a71..571caf9 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -102,6 +102,12 @@ private:
// The maximum size of a cold allocation context, from the profile summary.
uint64_t MaxColdSize;
+ // Tracks whether we have built the Trie from existing MD_memprof metadata. We
+ // apply different heuristics for determining whether to discard non-cold
+ // contexts when rebuilding as we have lost information available during the
+ // original profile match.
+ bool BuiltFromExistingMetadata = false;
+
void deleteTrieNode(CallStackTrieNode *Node) {
if (!Node)
return;
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index af1e0d7..9a2773c 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -24,6 +24,7 @@
namespace llvm {
class TargetLibraryInfo;
+class IntrinsicInst;
/// The Vector Function Database.
///
@@ -188,6 +189,10 @@ LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID);
/// Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID);
+/// Given a deinterleaveN intrinsic, return the (narrow) vector type of each
+/// factor.
+LLVM_ABI VectorType *getDeinterleavedVectorType(IntrinsicInst *DI);
+
/// Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index c7e4bdf..a2311d2 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -181,6 +181,7 @@ enum Kind {
kw_amdgpu_cs_chain_preserve,
kw_amdgpu_kernel,
kw_amdgpu_gfx,
+ kw_amdgpu_gfx_whole_wave,
kw_tailcc,
kw_m68k_rtdcc,
kw_graalcc,
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 6bf2e17..e4f82ad 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -1159,6 +1159,7 @@ enum : unsigned {
SHT_LLVM_OFFLOADING = 0x6fff4c0b, // LLVM device offloading data.
SHT_LLVM_LTO = 0x6fff4c0c, // .llvm.lto for fat LTO.
SHT_LLVM_JT_SIZES = 0x6fff4c0d, // LLVM jump tables sizes.
+ SHT_LLVM_CFI_JUMP_TABLE = 0x6fff4c0e, // LLVM CFI jump table.
// Android's experimental support for SHT_RELR sections.
// https://android.googlesource.com/platform/bionic/+/b7feec74547f84559a1467aca02708ff61346d2a/libc/include/elf.h#512
SHT_ANDROID_RELR = 0x6fffff00, // Relocation entries; only offsets.
diff --git a/llvm/include/llvm/BinaryFormat/SFrame.h b/llvm/include/llvm/BinaryFormat/SFrame.h
index 16d3b16..98dbe38 100644
--- a/llvm/include/llvm/BinaryFormat/SFrame.h
+++ b/llvm/include/llvm/BinaryFormat/SFrame.h
@@ -15,33 +15,36 @@
#ifndef LLVM_BINARYFORMAT_SFRAME_H
#define LLVM_BINARYFORMAT_SFRAME_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Endian.h"
-namespace llvm::sframe {
+namespace llvm {
+
+template <typename T> struct EnumEntry;
+
+namespace sframe {
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
constexpr uint16_t Magic = 0xdee2;
enum class Version : uint8_t {
- V1 = 1,
- V2 = 2,
+#define HANDLE_SFRAME_VERSION(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/SFrameConstants.def"
};
enum class Flags : uint8_t {
- FDESorted = 0x01,
- FramePointer = 0x02,
- FDEFuncStartPCRel = 0x04,
+#define HANDLE_SFRAME_FLAG(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/SFrameConstants.def"
V2AllFlags = FDESorted | FramePointer | FDEFuncStartPCRel,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xff),
};
enum class ABI : uint8_t {
- AArch64EndianBig = 1,
- AArch64EndianLittle = 2,
- AMD64EndianLittle = 3,
+#define HANDLE_SFRAME_ABI(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/SFrameConstants.def"
};
/// SFrame FRE Types. Bits 0-3 of FuncDescEntry.Info.
@@ -160,6 +163,11 @@ template <endianness E> using FrameRowEntryAddr1 = FrameRowEntry<uint8_t, E>;
template <endianness E> using FrameRowEntryAddr2 = FrameRowEntry<uint16_t, E>;
template <endianness E> using FrameRowEntryAddr4 = FrameRowEntry<uint32_t, E>;
-} // namespace llvm::sframe
+ArrayRef<EnumEntry<Version>> getVersions();
+ArrayRef<EnumEntry<Flags>> getFlags();
+ArrayRef<EnumEntry<ABI>> getABIs();
+
+} // namespace sframe
+} // namespace llvm
#endif // LLVM_BINARYFORMAT_SFRAME_H
diff --git a/llvm/include/llvm/BinaryFormat/SFrameConstants.def b/llvm/include/llvm/BinaryFormat/SFrameConstants.def
new file mode 100644
index 0000000..643b15f
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/SFrameConstants.def
@@ -0,0 +1,39 @@
+//===- SFrameConstants.def --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if !(defined(HANDLE_SFRAME_VERSION) || defined(HANDLE_SFRAME_FLAG) || \
+ defined(HANDLE_SFRAME_ABI))
+#error "Missing HANDLE_SFRAME definition"
+#endif
+
+#ifndef HANDLE_SFRAME_VERSION
+#define HANDLE_SFRAME_VERSION(CODE, NAME)
+#endif
+
+#ifndef HANDLE_SFRAME_FLAG
+#define HANDLE_SFRAME_FLAG(CODE, NAME)
+#endif
+
+#ifndef HANDLE_SFRAME_ABI
+#define HANDLE_SFRAME_ABI(CODE, NAME)
+#endif
+
+HANDLE_SFRAME_VERSION(0x01, V1)
+HANDLE_SFRAME_VERSION(0x02, V2)
+
+HANDLE_SFRAME_FLAG(0x01, FDESorted)
+HANDLE_SFRAME_FLAG(0x02, FramePointer)
+HANDLE_SFRAME_FLAG(0x04, FDEFuncStartPCRel)
+
+HANDLE_SFRAME_ABI(0x01, AArch64EndianBig)
+HANDLE_SFRAME_ABI(0x02, AArch64EndianLittle)
+HANDLE_SFRAME_ABI(0x03, AMD64EndianLittle)
+
+#undef HANDLE_SFRAME_VERSION
+#undef HANDLE_SFRAME_FLAG
+#undef HANDLE_SFRAME_ABI
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 31f1197b..da82904 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -700,18 +700,19 @@ public:
/// Given an G_UDIV \p MI or G_UREM \p MI expressing a divide by constant,
/// return an expression that implements it by multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
- MachineInstr *buildUDivorURemUsingMul(MachineInstr &MI) const;
+ MachineInstr *buildUDivOrURemUsingMul(MachineInstr &MI) const;
/// Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
- bool matchUDivorURemByConst(MachineInstr &MI) const;
- void applyUDivorURemByConst(MachineInstr &MI) const;
-
- /// Given an G_SDIV \p MI expressing a signed divide by constant, return an
- /// expression that implements it by multiplying by a magic number.
- /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
- MachineInstr *buildSDivUsingMul(MachineInstr &MI) const;
- /// Combine G_SDIV by constant into a multiply by magic constant.
- bool matchSDivByConst(MachineInstr &MI) const;
- void applySDivByConst(MachineInstr &MI) const;
+ bool matchUDivOrURemByConst(MachineInstr &MI) const;
+ void applyUDivOrURemByConst(MachineInstr &MI) const;
+
+ /// Given an G_SDIV \p MI or G_SREM \p MI expressing a signed divide by
+ /// constant, return an expression that implements it by multiplying by a
+ /// magic number. Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's
+ /// Guide".
+ MachineInstr *buildSDivOrSRemUsingMul(MachineInstr &MI) const;
+ /// Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
+ bool matchSDivOrSRemByConst(MachineInstr &MI) const;
+ void applySDivOrSRemByConst(MachineInstr &MI) const;
/// Given an G_SDIV \p MI expressing a signed divided by a pow2 constant,
/// return expressions that implements it by shifting.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
index da73238..490d1a3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
@@ -103,6 +103,20 @@ public:
/// \return The known alignment for the pointer-like value \p R.
Align computeKnownAlignment(Register R, unsigned Depth = 0);
+ /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts
+ /// that are all less than the element bit-width of the shift node, return the
+ /// valid constant range.
+ std::optional<ConstantRange>
+ getValidShiftAmountRange(Register R, const APInt &DemandedElts,
+ unsigned Depth);
+
+ /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts
+ /// that are all less than the element bit-width of the shift node, return the
+ /// minimum possible value.
+ std::optional<uint64_t> getValidMinimumShiftAmount(Register R,
+ const APInt &DemandedElts,
+ unsigned Depth = 0);
+
/// Determine which floating-point classes are valid for \p V, and return them
/// in KnownFPClass bit sets.
///
diff --git a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index aef9190..7940752 100644
--- a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -39,8 +39,8 @@ class MachineBlockFrequencyInfo {
public:
LLVM_ABI MachineBlockFrequencyInfo(); // Legacy pass manager only.
LLVM_ABI explicit MachineBlockFrequencyInfo(
- MachineFunction &F, MachineBranchProbabilityInfo &MBPI,
- MachineLoopInfo &MLI);
+ const MachineFunction &F, const MachineBranchProbabilityInfo &MBPI,
+ const MachineLoopInfo &MLI);
LLVM_ABI MachineBlockFrequencyInfo(MachineBlockFrequencyInfo &&);
LLVM_ABI ~MachineBlockFrequencyInfo();
diff --git a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
index 98a60c9..1d954cf 100644
--- a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -46,6 +46,11 @@ public:
LLVM_ABI Result run(Function &F, FunctionAnalysisManager &FAM);
};
+class FreeMachineFunctionPass : public PassInfoMixin<FreeMachineFunctionPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_MachineFunctionAnalysis
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 72594c7..1a548a5 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3201,11 +3201,15 @@ public:
/// Lower an interleaved load to target specific intrinsics. Return
/// true on success.
///
- /// \p LI is the vector load instruction.
+ /// \p Load is the vector load instruction. Can be either a plain load
+ /// instruction or a vp.load intrinsic.
+ /// \p Mask is a per-segment (i.e. number of lanes equal to that of one
+ /// component being interwoven) mask. Can be nullptr, in which case the
+ /// result is uncondiitional.
/// \p Shuffles is the shufflevector list to DE-interleave the loaded vector.
/// \p Indices is the corresponding indices for each shufflevector.
/// \p Factor is the interleave factor.
- virtual bool lowerInterleavedLoad(LoadInst *LI,
+ virtual bool lowerInterleavedLoad(Instruction *Load, Value *Mask,
ArrayRef<ShuffleVectorInst *> Shuffles,
ArrayRef<unsigned> Indices,
unsigned Factor) const {
@@ -3223,17 +3227,6 @@ public:
return false;
}
- /// Lower an interleaved load to target specific intrinsics. Return
- /// true on success.
- ///
- /// \p Load is a vp.load instruction.
- /// \p Mask is a mask value
- /// \p DeinterleaveRes is a list of deinterleaved results.
- virtual bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
- ArrayRef<Value *> DeinterleaveRes) const {
- return false;
- }
-
/// Lower an interleaved store to target specific intrinsics. Return
/// true on success.
///
@@ -3251,10 +3244,9 @@ public:
///
/// \p Load is the accompanying load instruction. Can be either a plain load
/// instruction or a vp.load intrinsic.
- /// \p DeinterleaveValues contains the deinterleaved values.
- virtual bool
- lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
- ArrayRef<Value *> DeinterleaveValues) const {
+ /// \p DI represents the deinterleaveN intrinsic.
+ virtual bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
+ IntrinsicInst *DI) const {
return false;
}
@@ -3262,10 +3254,14 @@ public:
/// Return true on success. Currently only supports
/// llvm.vector.interleave{2,3,5,7}
///
- /// \p SI is the accompanying store instruction
+ /// \p Store is the accompanying store instruction. Can be either a plain
+ /// store or a vp.store intrinsic.
+ /// \p Mask is a per-segment (i.e. number of lanes equal to that of one
+ /// component being interwoven) mask. Can be nullptr, in which case the
+ /// result is uncondiitional.
/// \p InterleaveValues contains the interleaved values.
virtual bool
- lowerInterleaveIntrinsicToStore(StoreInst *SI,
+ lowerInterleaveIntrinsicToStore(Instruction *Store, Value *Mask,
ArrayRef<Value *> InterleaveValues) const {
return false;
}
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.h b/llvm/include/llvm/Frontend/OpenMP/OMP.h
index d44c333..9d0a554 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.h
@@ -51,13 +51,17 @@ static constexpr inline bool canHaveIterator(Clause C) {
// Can clause C create a private copy of a variable.
static constexpr inline bool isPrivatizingClause(Clause C) {
switch (C) {
+ case OMPC_detach:
case OMPC_firstprivate:
+ // TODO case OMPC_induction:
case OMPC_in_reduction:
+ case OMPC_is_device_ptr:
case OMPC_lastprivate:
case OMPC_linear:
case OMPC_private:
case OMPC_reduction:
case OMPC_task_reduction:
+ case OMPC_use_device_ptr:
return true;
default:
return false;
diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h
index d68491e..ef761eb 100644
--- a/llvm/include/llvm/IR/CallingConv.h
+++ b/llvm/include/llvm/IR/CallingConv.h
@@ -284,6 +284,9 @@ namespace CallingConv {
RISCV_VLSCall_32768 = 122,
RISCV_VLSCall_65536 = 123,
+ // Calling convention for AMDGPU whole wave functions.
+ AMDGPU_Gfx_WholeWave = 124,
+
/// The highest possible ID. Must be some 2^k - 1.
MaxID = 1023
};
@@ -294,8 +297,13 @@ namespace CallingConv {
/// directly or indirectly via a call-like instruction.
constexpr bool isCallableCC(CallingConv::ID CC) {
switch (CC) {
+ // Called with special intrinsics:
+ // llvm.amdgcn.cs.chain
case CallingConv::AMDGPU_CS_Chain:
case CallingConv::AMDGPU_CS_ChainPreserve:
+ // llvm.amdgcn.call.whole.wave
+ case CallingConv::AMDGPU_Gfx_WholeWave:
+ // Hardware entry points:
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_ES:
case CallingConv::AMDGPU_GS:
diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h
index 77cee87..c529a86 100644
--- a/llvm/include/llvm/IR/DebugInfo.h
+++ b/llvm/include/llvm/IR/DebugInfo.h
@@ -39,30 +39,26 @@ class DbgVariableRecord;
class Instruction;
class Module;
-/// Finds dbg.declare intrinsics declaring local variables as living in the
+/// Finds dbg.declare records declaring local variables as living in the
/// memory that 'V' points to.
-LLVM_ABI TinyPtrVector<DbgDeclareInst *> findDbgDeclares(Value *V);
-/// As above, for DVRDeclares.
LLVM_ABI TinyPtrVector<DbgVariableRecord *> findDVRDeclares(Value *V);
/// As above, for DVRValues.
LLVM_ABI TinyPtrVector<DbgVariableRecord *> findDVRValues(Value *V);
-/// Finds the llvm.dbg.value intrinsics describing a value.
-LLVM_ABI void findDbgValues(
- SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V,
- SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords = nullptr);
-
-/// Finds the debug info intrinsics describing a value.
-LLVM_ABI void findDbgUsers(
- SmallVectorImpl<DbgVariableIntrinsic *> &DbgInsts, Value *V,
- SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords = nullptr);
+/// Finds the debug info records describing a value.
+LLVM_ABI void
+findDbgUsers(Value *V,
+ SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords);
+/// Finds the dbg.values describing a value.
+LLVM_ABI void
+findDbgValues(Value *V,
+ SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords);
/// Find subprogram that is enclosing this scope.
LLVM_ABI DISubprogram *getDISubprogram(const MDNode *Scope);
/// Produce a DebugLoc to use for each dbg.declare that is promoted to a
/// dbg.value.
-LLVM_ABI DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII);
LLVM_ABI DebugLoc getDebugValueLoc(DbgVariableRecord *DVR);
/// Strip debug info in the module if it exists.
@@ -115,8 +111,7 @@ public:
LLVM_ABI void processVariable(DILocalVariable *DVI);
/// Process debug info location.
LLVM_ABI void processLocation(const Module &M, const DILocation *Loc);
- /// Process a DbgRecord (e.g, treat a DbgVariableRecord like a
- /// DbgVariableIntrinsic).
+ /// Process a DbgRecord.
LLVM_ABI void processDbgRecord(const Module &M, const DbgRecord &DR);
/// Process subprogram.
@@ -193,13 +188,6 @@ using AssignmentInstRange =
/// Iterators invalidated by adding or removing DIAssignID metadata to/from any
/// instruction (including by deleting or cloning instructions).
LLVM_ABI AssignmentInstRange getAssignmentInsts(DIAssignID *ID);
-/// Return a range of instructions (typically just one) that perform the
-/// assignment that \p DAI encodes.
-/// Iterators invalidated by adding or removing DIAssignID metadata to/from any
-/// instruction (including by deleting or cloning instructions).
-inline AssignmentInstRange getAssignmentInsts(const DbgAssignIntrinsic *DAI) {
- return getAssignmentInsts(DAI->getAssignID());
-}
inline AssignmentInstRange getAssignmentInsts(const DbgVariableRecord *DVR) {
assert(DVR->isDbgAssign() &&
@@ -290,8 +278,6 @@ struct VarRecord {
DILocalVariable *Var;
DILocation *DL;
- VarRecord(DbgVariableIntrinsic *DVI)
- : Var(DVI->getVariable()), DL(getDebugValueLoc(DVI)) {}
VarRecord(DbgVariableRecord *DVR)
: Var(DVR->getVariable()), DL(getDebugValueLoc(DVR)) {}
VarRecord(DILocalVariable *Var, DILocation *DL) : Var(Var), DL(DL) {}
diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index 9345f95..f1f0c189 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -66,7 +66,6 @@ namespace dwarf {
enum Tag : uint16_t;
}
-class DbgVariableIntrinsic;
class DbgVariableRecord;
LLVM_ABI extern cl::opt<bool> EnableFSDiscriminator;
@@ -4613,7 +4612,6 @@ class DebugVariable {
LLVM_ABI static const FragmentInfo DefaultFragment;
public:
- LLVM_ABI DebugVariable(const DbgVariableIntrinsic *DII);
LLVM_ABI DebugVariable(const DbgVariableRecord *DVR);
DebugVariable(const DILocalVariable *Var,
@@ -4681,7 +4679,6 @@ template <> struct DenseMapInfo<DebugVariable> {
/// information).
class DebugVariableAggregate : public DebugVariable {
public:
- LLVM_ABI DebugVariableAggregate(const DbgVariableIntrinsic *DVI);
DebugVariableAggregate(const DebugVariable &V)
: DebugVariable(V.getVariable(), std::nullopt, V.getInlinedAt()) {}
};
diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def
index df572e8..90276ea 100644
--- a/llvm/include/llvm/IR/FixedMetadataKinds.def
+++ b/llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -53,3 +53,4 @@ LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38)
LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39)
LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40)
LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41)
+LLVM_FIXED_MD_KIND(MD_callee_type, "callee_type", 42)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index d8fda0e..8bfa345 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -3572,6 +3572,12 @@ def int_amdgcn_cvt_f16_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_bf8">,
[llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_amdgcn_sat_pk4_i4_i8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_i4_i8">,
+ DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
+
+def int_amdgcn_sat_pk4_u4_u8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_u4_u8">,
+ DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
+
//===----------------------------------------------------------------------===//
// Special Intrinsics for backend internal use only. No frontend
// should emit calls to these.
@@ -3711,6 +3717,20 @@ class AMDGPUWmmaIntrinsicModsAllDiff<LLVMType DstTy, LLVMType AB, LLVMType C> :
IntrWillReturn, IntrNoCallback, IntrNoFree]
>;
+class AMDGPUWmmaIntrinsicModsC_MatrixFMT :
+ Intrinsic<
+ [llvm_anyfloat_ty], // %D
+ [
+ llvm_i32_ty, // matrix_a_fmt
+ llvm_anyint_ty, // %A
+ llvm_i32_ty, // matrix_b_fmt
+ llvm_anyint_ty, // %B
+ llvm_i16_ty, // %C_mod: 0 - none, 1 - neg, 2 - abs, 3 - neg(abs)
+ LLVMMatchType<0>, // %C
+ ],
+ [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
+>;
+
defset list<Intrinsic> AMDGPUWMMAIntrinsicsGFX1250 = {
def int_amdgcn_wmma_f32_16x16x4_f32 : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>;
def int_amdgcn_wmma_f32_16x16x32_bf16 : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>;
@@ -3735,6 +3755,7 @@ def int_amdgcn_wmma_f32_16x16x128_fp8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint
def int_amdgcn_wmma_f32_16x16x128_bf8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
def int_amdgcn_wmma_f32_16x16x128_bf8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
def int_amdgcn_wmma_i32_16x16x64_iu8 : AMDGPUWmmaIntrinsicModsAB<llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_wmma_f32_16x16x128_f8f6f4 : AMDGPUWmmaIntrinsicModsC_MatrixFMT;
def int_amdgcn_wmma_f32_32x16x128_f4 : AMDGPUWmmaIntrinsicF4ModsC<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 0375f29..967d166 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -331,6 +331,11 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
!eq(gf,"m8n16:x2") : !listsplat(llvm_i32_ty, 2),
!eq(gf,"m8n16:x4") : !listsplat(llvm_i32_ty, 4),
+ // stmatrix b8 -> s32 @ m16n8
+ !eq(gf,"m16n8:x1") : !listsplat(llvm_i32_ty, 1),
+ !eq(gf,"m16n8:x2") : !listsplat(llvm_i32_ty, 2),
+ !eq(gf,"m16n8:x4") : !listsplat(llvm_i32_ty, 4),
+
);
}
@@ -403,6 +408,17 @@ class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> {
!subst("llvm.", "int_", intr));
}
+class STMATRIX_NAME<WMMA_REGS Frag, int Trans> {
+ string intr = "llvm.nvvm.stmatrix.sync.aligned"
+ # "." # Frag.geom
+ # "." # Frag.frag
+ # !if(Trans, ".trans", "")
+ # "." # Frag.ptx_elt_type
+ ;
+ string record = !subst(".", "_",
+ !subst("llvm.", "int_", intr));
+}
+
// Generates list of 4-tuples of WMMA_REGS representing a valid MMA op.
// Geom: list of supported geometries.
// TypeN: PTX type of the corresponding fragment's element.
@@ -443,6 +459,16 @@ class LDMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
list<string> ops = !foreach(x, ret, x.gft);
}
+class STMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
+ list<WMMA_REGS> ret =
+ !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
+ !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
+ !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
+ [WMMA_REGS<geom, frag, type>]))))));
+ // Debugging aid for readable representation of the list above.
+ list<string> ops = !foreach(x, ret, x.gft);
+}
+
// Creates list of valid combinations of fragments. This is the main list that
// drives generation of corresponding intrinsics and instructions.
class NVVM_MMA_OPS {
@@ -537,9 +563,18 @@ class NVVM_MMA_OPS {
list<WMMA_REGS> ldmatrix_geom_m8n16_ops = LDMATRIX_OPS<
["m8n16"], ["x1", "x2", "x4"], ["b8x16.b6x16_p32", "b8x16.b4x16_p64"]>.ret;
+ list<WMMA_REGS> stmatrix_b16_ops = STMATRIX_OPS<
+ ["m8n8"], ["x1", "x2", "x4"], ["b16"]>.ret;
+
+ list<WMMA_REGS> stmatrix_b8_ops = STMATRIX_OPS<
+ ["m16n8"], ["x1", "x2", "x4"], ["b8"]>.ret;
+
list<WMMA_REGS> all_ldmatrix_ops = !listconcat(ldmatrix_b16_ops,
ldmatrix_geom_m16n16_ops,
ldmatrix_geom_m8n16_ops);
+
+ list<WMMA_REGS> all_stmatrix_ops = !listconcat(stmatrix_b16_ops,
+ stmatrix_b8_ops);
}
def NVVM_MMA_OPS : NVVM_MMA_OPS;
@@ -680,6 +715,19 @@ class NVVM_LDMATRIX_SUPPORTED<WMMA_REGS frag, bit trans> {
);
}
+// Returns true if the fragment is valid for stmatrix ops is supported;
+// false otherwise.
+class NVVM_STMATRIX_SUPPORTED<WMMA_REGS frag, bit trans> {
+ string g = frag.geom;
+ string t = frag.ptx_elt_type;
+
+ bit ret = !cond(
+ !and(!eq(g, "m8n8"), !eq(t, "b16")): true,
+ !and(!eq(g, "m16n8"), !eq(t, "b8"), !eq(trans, 1)): true,
+ true: false
+ );
+}
+
class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
string Suffix = !if(sync, "sync_", "")
# mode # "_"
@@ -1969,6 +2017,23 @@ foreach transposed = [0, 1] in {
}
}
+// STMATRIX
+class NVVM_STMATRIX<WMMA_REGS Frag, int Transposed>
+ : Intrinsic<[],
+ !listconcat([llvm_anyptr_ty], Frag.regs),
+ [IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
+ WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
+ STMATRIX_NAME<Frag, Transposed>.intr>;
+
+foreach transposed = [0, 1] in {
+ foreach frag = NVVM_MMA_OPS.all_stmatrix_ops in {
+ if NVVM_STMATRIX_SUPPORTED<frag, transposed>.ret then {
+ def STMATRIX_NAME<frag, transposed>.record
+ : NVVM_STMATRIX<frag, transposed>;
+ }
+ }
+}
+
// MAPA
let IntrProperties = [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>] in {
def int_nvvm_mapa
@@ -2024,9 +2089,7 @@ foreach dim = 1...5 in {
tensor_dim_args, // actual tensor dims
[llvm_i64_ty]), // cache_hint
[llvm_i1_ty], // Flag for cache_hint
- [IntrConvergent,
- ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
- NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>]>;
+ [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
// Intrinsics for TMA Copy with reduction
foreach red_op = ["add", "min", "max", "inc", "dec", "and", "or", "xor"] in
@@ -2037,18 +2100,31 @@ foreach dim = 1...5 in {
tensor_dim_args, // actual tensor dims
[llvm_i64_ty]), // cache_hint
[llvm_i1_ty], // Flag for cache_hint
- [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
- NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>]>;
+ [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
}
}
+// TMA S2G tile::scatter4
+def int_nvvm_cp_async_bulk_tensor_s2g_tile_scatter4_2d
+ : DefaultAttrsIntrinsicFlags<[],
+ !listconcat([llvm_shared_ptr_ty, // src_smem_ptr
+ llvm_ptr_ty], // tensormap_ptr
+ !listsplat(llvm_i32_ty, 5), // dims
+ [llvm_i64_ty]), // cache_hint
+ [llvm_i1_ty], // Flag for cache_hint
+ [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
+
// TMA Tensor Copy Intrinsics: G2S -> From Global to Shared memory variants
foreach dim = 1...5 in {
defvar tensor_dim_args = !listsplat(llvm_i32_ty, dim);
- foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
+ foreach mode = !if(!ge(dim, 3), ["tile", "im2col", "im2col_w", "im2col_w_128"], ["tile"]) in {
defvar is_im2col = !eq(mode, "im2col");
- defvar num_im2col_offsets = !if(is_im2col, !add(dim, -2), 0);
+ defvar is_im2colw = !or(!eq(mode, "im2col_w"), !eq(mode, "im2col_w_128"));
+
+ // For im2col_w/w128 modes, the num_offsets is always 2.
+ // For im2col mode, the num_offsets is (dim - 2).
+ defvar num_im2col_offsets = !if(is_im2colw, 2, !if(is_im2col, !add(dim, -2), 0));
defvar im2col_offsets_args = !listsplat(llvm_i16_ty, num_im2col_offsets);
defvar g2s_params = !listconcat(
@@ -2079,11 +2155,60 @@ foreach dim = 1...5 in {
im2col_offsets_args, // im2col offsets
[llvm_i64_ty]), // cache_hint
[llvm_i1_ty], // Flag for cache_hint
- [IntrConvergent,
- ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+ [IntrConvergent, ReadOnly<ArgIndex<0>>]>;
+
+ def int_nvvm_cp_async_bulk_tensor_g2s_cta_ # mode # _ # dim # d :
+ DefaultAttrsIntrinsicFlags<[],
+ !listconcat([llvm_shared_ptr_ty, // dst_ptr
+ llvm_shared_ptr_ty, // mbarrier_ptr
+ llvm_ptr_ty], // tensormap_ptr
+ tensor_dim_args, // actual tensor dims
+ im2col_offsets_args, // im2col offsets
+ [llvm_i64_ty]), // cache_hint
+ [llvm_i1_ty], // Flag for cache_hint
+ [IntrConvergent, WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>]>;
}
}
+// TMA copy for tile::gather4
+def int_nvvm_cp_async_bulk_tensor_g2s_tile_gather4_2d
+ : DefaultAttrsIntrinsicFlags<[],
+ !listconcat(
+ [llvm_shared_cluster_ptr_ty, // dst_shared_cluster_ptr
+ llvm_shared_ptr_ty, // mbarrier_ptr
+ llvm_ptr_ty], // tensormap_ptr
+ !listsplat(llvm_i32_ty, 5), // co-ordinates
+ [llvm_i16_ty, // cta_mask
+ llvm_i64_ty]), // cache_hint
+ [llvm_i1_ty, // Flag for cta_mask
+ llvm_i1_ty, // Flag for cache_hint
+ llvm_i32_ty], // Flag for cta_group
+ [IntrConvergent,
+ WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
+ // Allowed values for cta_group are {0,1,2} i.e [0, 3).
+ Range<ArgIndex<12>, 0, 3>]>;
+
+def int_nvvm_cp_async_bulk_tensor_g2s_cta_tile_gather4_2d
+ : DefaultAttrsIntrinsicFlags<[],
+ !listconcat(
+ [llvm_shared_ptr_ty, // dst_shared_ptr
+ llvm_shared_ptr_ty, // mbarrier_ptr
+ llvm_ptr_ty], // tensormap_ptr
+ !listsplat(llvm_i32_ty, 5), // co-ordinates
+ [llvm_i64_ty]), // cache_hint
+ [llvm_i1_ty], // Flag for cache_hint
+ [IntrConvergent,
+ WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>]>;
+
+// TMA prefetch for tile::gather4
+def int_nvvm_cp_async_bulk_tensor_prefetch_tile_gather4_2d
+ : DefaultAttrsIntrinsicFlags<[],
+ !listconcat([llvm_ptr_ty], // tensormap_ptr
+ !listsplat(llvm_i32_ty, 5), // co-ordinates
+ [llvm_i64_ty]), // cache_hint
+ [llvm_i1_ty], // Flag for cache_hint
+ [IntrConvergent, ReadOnly<ArgIndex<0>>]>;
+
// Intrinsics for Prefetch and Prefetchu
let IntrProperties = [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>] in {
foreach level = ["L1", "L2"] in {
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 35c9cd6..b5f0cdf 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -90,7 +90,12 @@ let TargetPrefix = "spv" in {
def int_spv_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
def int_spv_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
def int_spv_reflect : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
- def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
+ def int_spv_refract
+ : DefaultAttrsIntrinsic<[LLVMMatchType<0>],
+ [llvm_anyfloat_ty, LLVMMatchType<0>,
+ llvm_anyfloat_ty],
+ [IntrNoMem]>;
+def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
def int_spv_smoothstep : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty], [IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 2de26c0..af252aa 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -1255,6 +1255,13 @@ public:
bool isReplaceable() const { return isTemporary() || isAlwaysReplaceable(); }
bool isAlwaysReplaceable() const { return getMetadataID() == DIAssignIDKind; }
+ /// Check if this is a valid generalized type metadata node.
+ bool hasGeneralizedMDString() {
+ if (getNumOperands() < 2 || !isa<MDString>(getOperand(1)))
+ return false;
+ return cast<MDString>(getOperand(1))->getString().ends_with(".generalized");
+ }
+
unsigned getNumTemporaryUses() const {
assert(isTemporary() && "Only for temporaries");
return Context.getReplaceableUses()->getNumUses();
@@ -1467,6 +1474,8 @@ public:
const Instruction *BInstr);
LLVM_ABI static MDNode *getMergedMemProfMetadata(MDNode *A, MDNode *B);
LLVM_ABI static MDNode *getMergedCallsiteMetadata(MDNode *A, MDNode *B);
+ LLVM_ABI static MDNode *getMergedCalleeTypeMetadata(const MDNode *A,
+ const MDNode *B);
};
/// Tuple of metadata.
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index 737610b..0fd5de3 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -112,7 +112,6 @@ inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
return false;
}
llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
- return false;
}
inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
@@ -179,7 +178,6 @@ inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
}
llvm_unreachable(
"Checking invalid f2i/d2i intrinsic for signed int conversion");
- return false;
}
inline APFloat::roundingMode
@@ -250,7 +248,6 @@ GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) {
return APFloat::rmTowardZero;
}
llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
- return APFloat::roundingMode::Invalid;
}
inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
@@ -280,7 +277,6 @@ inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
return false;
}
llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
- return false;
}
inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
@@ -310,7 +306,6 @@ inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
return false;
}
llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
- return false;
}
inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
@@ -340,7 +335,83 @@ inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
return false;
}
llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
- return false;
+}
+
+inline bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_ceil_ftz_f:
+ case Intrinsic::nvvm_fabs_ftz:
+ case Intrinsic::nvvm_floor_ftz_f:
+ case Intrinsic::nvvm_round_ftz_f:
+ case Intrinsic::nvvm_saturate_ftz_f:
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
+ return true;
+ case Intrinsic::nvvm_ceil_f:
+ case Intrinsic::nvvm_ceil_d:
+ case Intrinsic::nvvm_fabs:
+ case Intrinsic::nvvm_floor_f:
+ case Intrinsic::nvvm_floor_d:
+ case Intrinsic::nvvm_round_f:
+ case Intrinsic::nvvm_round_d:
+ case Intrinsic::nvvm_saturate_d:
+ case Intrinsic::nvvm_saturate_f:
+ case Intrinsic::nvvm_sqrt_f:
+ case Intrinsic::nvvm_sqrt_rn_d:
+ case Intrinsic::nvvm_sqrt_rn_f:
+ return false;
+ }
+ llvm_unreachable("Checking FTZ flag for invalid unary intrinsic");
+}
+
+inline bool RCPShouldFTZ(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
+ return true;
+ case Intrinsic::nvvm_rcp_rm_d:
+ case Intrinsic::nvvm_rcp_rm_f:
+ case Intrinsic::nvvm_rcp_rn_d:
+ case Intrinsic::nvvm_rcp_rn_f:
+ case Intrinsic::nvvm_rcp_rp_d:
+ case Intrinsic::nvvm_rcp_rp_f:
+ case Intrinsic::nvvm_rcp_rz_d:
+ case Intrinsic::nvvm_rcp_rz_f:
+ return false;
+ }
+ llvm_unreachable("Checking FTZ flag for invalid rcp intrinsic");
+}
+
+inline APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_rcp_rm_f:
+ case Intrinsic::nvvm_rcp_rm_d:
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
+ return APFloat::rmTowardNegative;
+
+ case Intrinsic::nvvm_rcp_rn_f:
+ case Intrinsic::nvvm_rcp_rn_d:
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ return APFloat::rmNearestTiesToEven;
+
+ case Intrinsic::nvvm_rcp_rp_f:
+ case Intrinsic::nvvm_rcp_rp_d:
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
+ return APFloat::rmTowardPositive;
+
+ case Intrinsic::nvvm_rcp_rz_f:
+ case Intrinsic::nvvm_rcp_rz_d:
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
+ return APFloat::rmTowardZero;
+ }
+ llvm_unreachable("Checking rounding mode for invalid rcp intrinsic");
+}
+
+inline DenormalMode GetNVVMDenromMode(bool ShouldFTZ) {
+ if (ShouldFTZ)
+ return DenormalMode::getPreserveSign();
+ return DenormalMode::getIEEE();
}
} // namespace nvvm
diff --git a/llvm/include/llvm/IR/OptBisect.h b/llvm/include/llvm/IR/OptBisect.h
index ea3c1de..d813ae9 100644
--- a/llvm/include/llvm/IR/OptBisect.h
+++ b/llvm/include/llvm/IR/OptBisect.h
@@ -15,6 +15,7 @@
#define LLVM_IR_OPTBISECT_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Compiler.h"
#include <limits>
@@ -82,8 +83,38 @@ private:
mutable int LastBisectNum = 0;
};
-/// Singleton instance of the OptBisect class, so multiple pass managers don't
-/// need to coordinate their uses of OptBisect.
+/// This class implements a mechanism to disable passes and individual
+/// optimizations at compile time based on a command line option
+/// (-opt-disable) in order to study how single transformations, or
+/// combinations thereof, affect the IR.
+class LLVM_ABI OptDisable : public OptPassGate {
+public:
+ /// Checks the pass name to determine if the specified pass should run.
+ ///
+ /// It returns true if the pass should run, i.e. if its name is was
+ /// not provided via command line.
+ /// If -opt-disable-enable-verbosity is given, the method prints the
+ /// name of the pass, and whether or not the pass will be executed.
+ ///
+ /// Most passes should not call this routine directly. Instead, it is called
+ /// through helper routines provided by the base classes of the pass. For
+ /// instance, function passes should call FunctionPass::skipFunction().
+ bool shouldRunPass(StringRef PassName,
+ StringRef IRDescription) const override;
+
+ /// Parses the command line argument to extract the names of the passes
+ /// to be disabled. Multiple pass names can be provided with comma separation.
+ void setDisabled(StringRef Pass);
+
+ /// isEnabled() should return true before calling shouldRunPass().
+ bool isEnabled() const override { return !DisabledPasses.empty(); }
+
+private:
+ StringSet<> DisabledPasses = {};
+};
+
+/// Singleton instance of the OptPassGate class, so multiple pass managers don't
+/// need to coordinate their uses of OptBisect and OptDisable.
LLVM_ABI OptPassGate &getGlobalPassGate();
} // end namespace llvm
diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h
index 4f44ae5..ea8226c 100644
--- a/llvm/include/llvm/IR/PassManager.h
+++ b/llvm/include/llvm/IR/PassManager.h
@@ -491,6 +491,22 @@ public:
/// invalidate them, unless they are preserved by the PreservedAnalyses set.
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA);
+ /// Directly clear a cached analysis for an IR unit.
+ ///
+ /// Using invalidate() over this is preferred unless you are really
+ /// sure you want to *only* clear this analysis without asking if it is
+ /// invalid.
+ template <typename AnalysisT> void clearAnalysis(IRUnitT &IR) {
+ AnalysisResultListT &ResultsList = AnalysisResultLists[&IR];
+ AnalysisKey *ID = AnalysisT::ID();
+
+ auto I =
+ llvm::find_if(ResultsList, [&ID](auto &E) { return E.first == ID; });
+ assert(I != ResultsList.end() && "Analysis must be available");
+ ResultsList.erase(I);
+ AnalysisResults.erase({ID, &IR});
+ }
+
private:
/// Look up a registered analysis pass.
PassConceptT &lookUpPass(AnalysisKey *ID) {
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h
index 8058c8a..89ad4e5 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.h
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.h
@@ -132,6 +132,10 @@ struct RuntimeLibcallsInfo {
return ImplToLibcall[Impl];
}
+ /// Check if this is valid libcall for the current module, otherwise
+ /// RTLIB::Unsupported.
+ RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const;
+
private:
static const RTLIB::LibcallImpl
DefaultLibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1];
@@ -156,6 +160,14 @@ private:
/// Map from a concrete LibcallImpl implementation to its RTLIB::Libcall kind.
LLVM_ABI static const RTLIB::Libcall ImplToLibcall[RTLIB::NumLibcallImpls];
+ /// Check if a function name is a recognized runtime call of any kind. This
+ /// does not consider if this call is available for any current compilation,
+ /// just that it is a known call somewhere. This returns the set of all
+ /// LibcallImpls which match the name; multiple implementations with the same
+ /// name may exist but differ in interpretation based on the target context.
+ LLVM_ABI static iterator_range<ArrayRef<uint16_t>::const_iterator>
+ getRecognizedLibcallImpls(StringRef FuncName);
+
static bool darwinHasSinCosStret(const Triple &TT) {
if (!TT.isOSDarwin())
return false;
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index 11926d4..f0297cd 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -2129,7 +2129,7 @@ defvar X86CommonLibcalls =
);
defvar Windows32DivRemMulCalls =
- LibcallImpls<(add WindowsDivRemMulLibcalls),
+ LibcallsWithCC<(add WindowsDivRemMulLibcalls), X86_STDCALL,
RuntimeLibcallPredicate<"TT.isWindowsMSVCEnvironment() || TT.isWindowsItaniumEnvironment()">>;
def X86_32SystemLibrary
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index 0322cbe..bfc1175 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -18,9 +18,7 @@
namespace llvm {
-class MCAlignFragment;
class MCFragment;
-class MCLEBFragment;
class MCSymbol;
class MCAssembler;
class MCContext;
@@ -60,6 +58,9 @@ protected: // Can only create subclasses.
MCAssembler *Asm = nullptr;
+ bool AllowAutoPadding = false;
+ bool AllowEnhancedRelaxation = false;
+
public:
MCAsmBackend(const MCAsmBackend &) = delete;
MCAsmBackend &operator=(const MCAsmBackend &) = delete;
@@ -73,11 +74,11 @@ public:
/// Return true if this target might automatically pad instructions and thus
/// need to emit padding enable/disable directives around sensative code.
- virtual bool allowAutoPadding() const { return false; }
+ bool allowAutoPadding() const { return AllowAutoPadding; }
/// Return true if this target allows an unrelaxable instruction to be
/// emitted into RelaxableFragment and then we can increase its size in a
/// tricky way for optimization.
- virtual bool allowEnhancedRelaxation() const { return false; }
+ bool allowEnhancedRelaxation() const { return AllowEnhancedRelaxation; }
/// lifetime management
virtual void reset() {}
@@ -105,21 +106,6 @@ public:
/// Get information on a fixup kind.
virtual MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const;
- /// Hook to check if extra nop bytes must be inserted for alignment directive.
- /// For some targets this may be necessary in order to support linker
- /// relaxation. The number of bytes to insert are returned in Size.
- virtual bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF,
- unsigned &Size) {
- return false;
- }
-
- /// Hook which indicates if the target requires a fixup to be generated when
- /// handling an align directive in an executable section
- virtual bool shouldInsertFixupForCodeAlign(MCAssembler &Asm,
- MCAlignFragment &AF) {
- return false;
- }
-
// Evaluate a fixup, returning std::nullopt to use default handling for
// `Value` and `IsResolved`. Otherwise, returns `IsResolved` with the
// expectation that the hook updates `Value`.
@@ -177,6 +163,10 @@ public:
}
// Defined by linker relaxation targets.
+
+ // Return false to use default handling. Otherwise, set `Size` to the number
+ // of padding bytes.
+ virtual bool relaxAlign(MCFragment &F, unsigned &Size) { return false; }
virtual bool relaxDwarfLineAddr(MCFragment &, bool &WasRelaxed) const {
return false;
}
diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h
index aa396ef..467ad4e 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -36,8 +36,6 @@ class MCCVDefRangeFragment;
class MCCVInlineLineTableFragment;
class MCFragment;
class MCFixup;
-class MCLEBFragment;
-class MCPseudoProbeAddrFragment;
class MCSymbolRefExpr;
class raw_ostream;
class MCAsmBackend;
@@ -69,6 +67,13 @@ private:
SmallVector<const MCSymbol *, 0> Symbols;
+ struct RelocDirective {
+ const MCExpr &Offset;
+ const MCExpr *Expr;
+ uint32_t Kind;
+ };
+ SmallVector<RelocDirective, 0> relocDirectives;
+
mutable SmallVector<std::pair<SMLoc, std::string>, 0> PendingErrors;
MCDwarfLineTableParams LTParams;
@@ -116,7 +121,6 @@ private:
bool relaxCVInlineLineTable(MCCVInlineLineTableFragment &DF);
bool relaxCVDefRange(MCCVDefRangeFragment &DF);
bool relaxFill(MCFillFragment &F);
- bool relaxPseudoProbeAddr(MCPseudoProbeAddrFragment &DF);
public:
/// Construct a new assembler instance.
@@ -205,6 +209,7 @@ public:
LLVM_ABI bool registerSection(MCSection &Section);
LLVM_ABI bool registerSymbol(const MCSymbol &Symbol);
+ void addRelocDirective(RelocDirective RD);
LLVM_ABI void reportError(SMLoc L, const Twine &Msg) const;
// Record pending errors during layout iteration, as they may go away once the
diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
index 3a7ca1a..cae2fbc 100644
--- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
+++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
@@ -136,6 +136,18 @@ public:
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const = 0;
+ /// Returns the disassembly of an instruction bundle for VLIW architectures
+ /// like Hexagon.
+ ///
+ /// \param Instr - An MCInst to populate with the contents of
+ /// the Bundle with sub-instructions encoded as Inst operands.
+ virtual DecodeStatus getInstructionBundle(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &CStream) const {
+ return Fail;
+ }
+
/// Used to perform separate target specific disassembly for a particular
/// symbol. May parse any prelude that precedes instructions after the
/// start of a symbol, or the entire symbol.
diff --git a/llvm/include/llvm/MC/MCELFStreamer.h b/llvm/include/llvm/MC/MCELFStreamer.h
index ad0961c..144f6bc 100644
--- a/llvm/include/llvm/MC/MCELFStreamer.h
+++ b/llvm/include/llvm/MC/MCELFStreamer.h
@@ -141,7 +141,8 @@ public:
}
private:
- void finalizeCGProfileEntry(const MCSymbolRefExpr *&S, uint64_t Offset);
+ void finalizeCGProfileEntry(const MCSymbolRefExpr *Sym, uint64_t Offset,
+ const MCSymbolRefExpr *&S);
void finalizeCGProfile();
bool SeenIdent = false;
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index e2a77b8..2ceeba2 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -40,14 +40,6 @@ class MCObjectStreamer : public MCStreamer {
std::unique_ptr<MCAssembler> Assembler;
bool EmitEHFrame;
bool EmitDebugFrame;
- struct PendingMCFixup {
- const MCSymbol *Sym;
- MCFixup Fixup;
- MCFragment *DF;
- PendingMCFixup(const MCSymbol *McSym, MCFragment *F, MCFixup McFixup)
- : Sym(McSym), Fixup(McFixup), DF(F) {}
- };
- SmallVector<PendingMCFixup, 2> PendingFixups;
struct PendingAssignment {
MCSymbol *Symbol;
@@ -62,8 +54,6 @@ class MCObjectStreamer : public MCStreamer {
void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &);
void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
- void emitInstructionImpl(const MCInst &Inst, const MCSubtargetInfo &STI);
- void resolvePendingFixups();
protected:
MCObjectStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
@@ -82,24 +72,6 @@ public:
MCSymbol *emitCFILabel() override;
void emitCFISections(bool EH, bool Debug) override;
- void insert(MCFragment *F) {
- auto *Sec = CurFrag->getParent();
- F->setParent(Sec);
- F->setLayoutOrder(CurFrag->getLayoutOrder() + 1);
- CurFrag->Next = F;
- CurFrag = F;
- Sec->curFragList()->Tail = F;
- }
-
- /// Get a data fragment to write into, creating a new one if the current
- /// fragment is not FT_Data.
- /// Optionally a \p STI can be passed in so that a new fragment is created
- /// if the Subtarget differs from the current fragment.
- MCFragment *getOrCreateDataFragment(const MCSubtargetInfo *STI = nullptr);
-
-protected:
- bool changeSectionImpl(MCSection *Section, uint32_t Subsection);
-
public:
void visitUsedSymbol(const MCSymbol &Sym) override;
@@ -108,6 +80,15 @@ public:
/// \name MCStreamer Interface
/// @{
+ // Add a fragment with a variable-size tail and start a new empty fragment.
+ void insert(MCFragment *F);
+
+ // Add a new fragment to the current section without a variable-size tail.
+ void newFragment();
+
+ void appendContents(size_t Num, char Elt);
+ void addFixup(const MCExpr *Value, MCFixupKind Kind);
+
void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
virtual void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment &F,
uint64_t Offset);
@@ -162,9 +143,8 @@ public:
void emitCVStringTableDirective() override;
void emitCVFileChecksumsDirective() override;
void emitCVFileChecksumOffsetDirective(unsigned FileNo) override;
- std::optional<std::pair<bool, std::string>>
- emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr,
- SMLoc Loc, const MCSubtargetInfo &STI) override;
+ void emitRelocDirective(const MCExpr &Offset, StringRef Name,
+ const MCExpr *Expr, SMLoc Loc = {}) override;
using MCStreamer::emitFill;
void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
SMLoc Loc = SMLoc()) override;
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 296fdd8..87a8349 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -91,8 +91,7 @@ private:
bool IsRegistered : 1;
bool IsText : 1;
-
- bool IsVirtual : 1;
+ bool IsBss : 1;
/// Whether the section contains linker-relaxable fragments. If true, the
/// offset between two locations may not be fully resolved.
@@ -113,7 +112,7 @@ protected:
StringRef Name;
SectionVariant Variant;
- MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsVirtual,
+ MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsBss,
MCSymbol *Begin);
// Protected non-virtual dtor prevents destroy through a base class pointer.
~MCSection() {}
@@ -178,9 +177,7 @@ public:
/// Check whether this section is "virtual", that is has no actual object
/// file contents.
- bool isVirtualSection() const { return IsVirtual; }
-
- virtual StringRef getVirtualSectionKind() const;
+ bool isBssSection() const { return IsBss; }
};
// Represents a contiguous piece of code or data within a section. Its size is
@@ -188,6 +185,7 @@ public:
// destructors.
class MCFragment {
friend class MCAssembler;
+ friend class MCStreamer;
friend class MCObjectStreamer;
friend class MCSection;
@@ -206,7 +204,6 @@ public:
FT_SymbolId,
FT_CVInlineLines,
FT_CVDefRange,
- FT_PseudoProbe,
};
private:
@@ -234,11 +231,16 @@ protected:
/// FT_Relaxable, x86-specific
bool AllowAutoPadding : 1;
+ // Track content and fixups for the fixed-size part as fragments are
+ // appended to the section. The content remains immutable, except when
+ // modified by applyFixup.
uint32_t ContentStart = 0;
uint32_t ContentEnd = 0;
uint32_t FixupStart = 0;
uint32_t FixupEnd = 0;
+ // Track content and fixups for the optional variable-size tail part,
+ // typically modified during relaxation.
uint32_t VarContentStart = 0;
uint32_t VarContentEnd = 0;
uint32_t VarFixupStart = 0;
@@ -255,6 +257,19 @@ protected:
uint32_t OperandSize;
} relax;
struct {
+ // The alignment to ensure, in bytes.
+ Align Alignment;
+ // The size of the integer (in bytes) of \p Value.
+ uint8_t FillLen;
+ // If true, fill with target-specific nop instructions.
+ bool EmitNops;
+ // The maximum number of bytes to emit; if the alignment
+ // cannot be satisfied in this width then this fragment is ignored.
+ unsigned MaxBytesToEmit;
+ // Value to use for filling padding bytes.
+ int64_t Fill;
+ } align;
+ struct {
// True if this is a sleb128, false if uleb128.
bool IsSigned;
// The value this fragment should contain.
@@ -283,10 +298,10 @@ public:
return false;
case MCFragment::FT_Relaxable:
case MCFragment::FT_Data:
+ case MCFragment::FT_Align:
case MCFragment::FT_Dwarf:
case MCFragment::FT_DwarfFrame:
case MCFragment::FT_LEB:
- case MCFragment::FT_PseudoProbe:
case MCFragment::FT_CVInlineLines:
case MCFragment::FT_CVDefRange:
return true;
@@ -328,9 +343,9 @@ public:
bool getAllowAutoPadding() const { return AllowAutoPadding; }
void setAllowAutoPadding(bool V) { AllowAutoPadding = V; }
- // Content-related functions manage parent's storage using ContentStart and
+ //== Content-related functions manage parent's storage using ContentStart and
// ContentSize.
- void clearContents() { ContentEnd = ContentStart; }
+
// Get a SmallVector reference. The caller should call doneAppending to update
// `ContentEnd`.
SmallVectorImpl<char> &getContentsForAppending() {
@@ -354,7 +369,6 @@ public:
getContentsForAppending().append(Num, Elt);
doneAppending();
}
- LLVM_ABI void setContents(ArrayRef<char> Contents);
MutableArrayRef<char> getContents() {
return MutableArrayRef(getParent()->ContentStorage)
.slice(ContentStart, ContentEnd - ContentStart);
@@ -386,7 +400,6 @@ public:
void clearFixups() { FixupEnd = FixupStart; }
LLVM_ABI void addFixup(MCFixup Fixup);
LLVM_ABI void appendFixups(ArrayRef<MCFixup> Fixups);
- LLVM_ABI void setFixups(ArrayRef<MCFixup> Fixups);
MutableArrayRef<MCFixup> getFixups() {
return MutableArrayRef(getParent()->FixupStorage)
.slice(FixupStart, FixupEnd - FixupStart);
@@ -441,7 +454,45 @@ public:
llvm::copy(Inst, S.begin() + u.relax.OperandStart);
}
+ //== FT_Align functions
+ void makeAlign(Align Alignment, int64_t Fill, uint8_t FillLen,
+ unsigned MaxBytesToEmit) {
+ Kind = FT_Align;
+ u.align.EmitNops = false;
+ u.align.Alignment = Alignment;
+ u.align.Fill = Fill;
+ u.align.FillLen = FillLen;
+ u.align.MaxBytesToEmit = MaxBytesToEmit;
+ }
+
+ Align getAlignment() const {
+ assert(Kind == FT_Align);
+ return u.align.Alignment;
+ }
+ int64_t getAlignFill() const {
+ assert(Kind == FT_Align);
+ return u.align.Fill;
+ }
+ uint8_t getAlignFillLen() const {
+ assert(Kind == FT_Align);
+ return u.align.FillLen;
+ }
+ unsigned getAlignMaxBytesToEmit() const {
+ assert(Kind == FT_Align);
+ return u.align.MaxBytesToEmit;
+ }
+ bool hasAlignEmitNops() const {
+ assert(Kind == FT_Align);
+ return u.align.EmitNops;
+ }
+
//== FT_LEB functions
+ void makeLEB(bool IsSigned, const MCExpr *Value) {
+ assert(Kind == FT_Data);
+ Kind = MCFragment::FT_LEB;
+ u.leb.IsSigned = IsSigned;
+ u.leb.Value = Value;
+ }
const MCExpr &getLEBValue() const {
assert(Kind == FT_LEB);
return *u.leb.Value;
@@ -454,10 +505,6 @@ public:
assert(Kind == FT_LEB);
return u.leb.IsSigned;
}
- void setLEBSigned(bool S) {
- assert(Kind == FT_LEB);
- u.leb.IsSigned = S;
- }
//== FT_DwarfFrame functions
const MCExpr &getDwarfAddrDelta() const {
@@ -486,52 +533,6 @@ protected:
: MCFragment(FType, HasInstructions) {}
};
-class MCAlignFragment : public MCFragment {
- /// Flag to indicate that (optimal) NOPs should be emitted instead
- /// of using the provided value. The exact interpretation of this flag is
- /// target dependent.
- bool EmitNops : 1;
-
- /// The alignment to ensure, in bytes.
- Align Alignment;
-
- /// The size of the integer (in bytes) of \p Value.
- uint8_t FillLen;
-
- /// The maximum number of bytes to emit; if the alignment
- /// cannot be satisfied in this width then this fragment is ignored.
- unsigned MaxBytesToEmit;
-
- /// Value to use for filling padding bytes.
- int64_t Fill;
-
- /// When emitting Nops some subtargets have specific nop encodings.
- const MCSubtargetInfo *STI = nullptr;
-
-public:
- MCAlignFragment(Align Alignment, int64_t Fill, uint8_t FillLen,
- unsigned MaxBytesToEmit)
- : MCFragment(FT_Align, false), EmitNops(false), Alignment(Alignment),
- FillLen(FillLen), MaxBytesToEmit(MaxBytesToEmit), Fill(Fill) {}
-
- Align getAlignment() const { return Alignment; }
- int64_t getFill() const { return Fill; }
- uint8_t getFillLen() const { return FillLen; }
- unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
-
- bool hasEmitNops() const { return EmitNops; }
- void setEmitNops(bool Value, const MCSubtargetInfo *STI) {
- EmitNops = Value;
- this->STI = STI;
- }
-
- const MCSubtargetInfo *getSubtargetInfo() const { return STI; }
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_Align;
- }
-};
-
class MCFillFragment : public MCFragment {
uint8_t ValueSize;
/// Value to use for filling bytes.
@@ -729,22 +730,6 @@ public:
}
};
-class MCPseudoProbeAddrFragment : public MCEncodedFragment {
- /// The expression for the difference of the two symbols that
- /// make up the address delta between two .pseudoprobe directives.
- const MCExpr *AddrDelta;
-
-public:
- MCPseudoProbeAddrFragment(const MCExpr *AddrDelta)
- : MCEncodedFragment(FT_PseudoProbe, false), AddrDelta(AddrDelta) {}
-
- const MCExpr &getAddrDelta() const { return *AddrDelta; }
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_PseudoProbe;
- }
-};
-
inline MCSection::iterator &MCSection::iterator::operator++() {
F = F->Next;
return *this;
diff --git a/llvm/include/llvm/MC/MCSectionCOFF.h b/llvm/include/llvm/MC/MCSectionCOFF.h
index 4472a12..f979413a 100644
--- a/llvm/include/llvm/MC/MCSectionCOFF.h
+++ b/llvm/include/llvm/MC/MCSectionCOFF.h
@@ -82,7 +82,6 @@ public:
raw_ostream &OS,
uint32_t Subsection) const override;
bool useCodeAlign() const override;
- StringRef getVirtualSectionKind() const override;
unsigned getOrAssignWinCFISectionID(unsigned *NextID) const {
if (WinCFISectionID == ~0U)
diff --git a/llvm/include/llvm/MC/MCSectionELF.h b/llvm/include/llvm/MC/MCSectionELF.h
index f09d305..64a4daf 100644
--- a/llvm/include/llvm/MC/MCSectionELF.h
+++ b/llvm/include/llvm/MC/MCSectionELF.h
@@ -68,10 +68,6 @@ private:
Group.getPointer()->setIsSignature();
}
- // TODO Delete after we stop supporting generation of GNU-style .zdebug_*
- // sections.
- void setSectionName(StringRef Name) { this->Name = Name; }
-
public:
/// Decides whether a '.section' directive should be printed before the
/// section name
@@ -88,7 +84,6 @@ public:
raw_ostream &OS,
uint32_t Subsection) const override;
bool useCodeAlign() const override;
- StringRef getVirtualSectionKind() const override;
bool isUnique() const { return UniqueID != NonUniqueID; }
unsigned getUniqueID() const { return UniqueID; }
diff --git a/llvm/include/llvm/MC/MCSectionGOFF.h b/llvm/include/llvm/MC/MCSectionGOFF.h
index 9e3f95e..b166397 100644
--- a/llvm/include/llvm/MC/MCSectionGOFF.h
+++ b/llvm/include/llvm/MC/MCSectionGOFF.h
@@ -111,7 +111,7 @@ public:
// Returns the text style for a section. Only defined for ED and PR sections.
GOFF::ESDTextStyle getTextStyle() const {
- assert((isED() || isPR() || isVirtualSection()) && "Expect ED or PR section");
+ assert((isED() || isPR() || isBssSection()) && "Expect ED or PR section");
if (isED())
return EDAttributes.TextStyle;
if (isPR())
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index 1f7c8b5..4bfc8f9 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -259,6 +259,8 @@ class LLVM_ABI MCStreamer {
bool AllowAutoPadding = false;
protected:
+ bool IsObj = false;
+
// Symbol of the current epilog for which we are processing SEH directives.
WinEH::FrameInfo::Epilog *CurrentWinEpilog = nullptr;
@@ -270,6 +272,8 @@ protected:
/// section changes.
virtual void changeSection(MCSection *, uint32_t);
+ void addFragment(MCFragment *F);
+
virtual void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame);
virtual void emitCFIEndProcImpl(MCDwarfFrameInfo &CurFrame);
@@ -308,6 +312,7 @@ public:
virtual void reset();
MCContext &getContext() const { return Context; }
+ bool isObj() const { return IsObj; }
// MCObjectStreamer has an MCAssembler and allows more expression folding at
// parse time.
@@ -425,11 +430,15 @@ public:
}
MCFragment *getCurrentFragment() const {
+ // Ensure consistency with the section stack.
assert(!getCurrentSection().first ||
CurFrag->getParent() == getCurrentSection().first);
+ // Ensure we eagerly allocate an empty fragment after adding fragment with a
+ // variable-size tail.
+ assert(!CurFrag || CurFrag->getKind() == MCFragment::FT_Data);
return CurFrag;
}
-
+ size_t getCurFragOffset() const { return getCurrentFragment()->Offset; }
/// Save the current and previous section on the section stack.
void pushSection() {
SectionStack.push_back(
@@ -1048,13 +1057,9 @@ public:
virtual void emitSyntaxDirective();
- /// Record a relocation described by the .reloc directive. Return std::nullopt
- /// if succeeded. Otherwise, return a pair (Name is invalid, error message).
- virtual std::optional<std::pair<bool, std::string>>
- emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr,
- SMLoc Loc, const MCSubtargetInfo &STI) {
- return std::nullopt;
- }
+ /// Record a relocation described by the .reloc directive.
+ virtual void emitRelocDirective(const MCExpr &Offset, StringRef Name,
+ const MCExpr *Expr, SMLoc Loc = {}) {}
virtual void emitAddrsig() {}
virtual void emitAddrsigSym(const MCSymbol *Sym) {}
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index 1036868..a3aa0d9 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -1312,7 +1312,7 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
case ELF::EM_PPC:
return (IsLittleEndian ? "elf32-powerpcle" : "elf32-powerpc");
case ELF::EM_RISCV:
- return "elf32-littleriscv";
+ return (IsLittleEndian ? "elf32-littleriscv" : "elf32-bigriscv");
case ELF::EM_CSKY:
return "elf32-csky";
case ELF::EM_SPARC:
@@ -1338,7 +1338,7 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
case ELF::EM_PPC64:
return (IsLittleEndian ? "elf64-powerpcle" : "elf64-powerpc");
case ELF::EM_RISCV:
- return "elf64-littleriscv";
+ return (IsLittleEndian ? "elf64-littleriscv" : "elf64-bigriscv");
case ELF::EM_S390:
return "elf64-s390";
case ELF::EM_SPARCV9:
@@ -1400,9 +1400,9 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
case ELF::EM_RISCV:
switch (EF.getHeader().e_ident[ELF::EI_CLASS]) {
case ELF::ELFCLASS32:
- return Triple::riscv32;
+ return IsLittleEndian ? Triple::riscv32 : Triple::riscv32be;
case ELF::ELFCLASS64:
- return Triple::riscv64;
+ return IsLittleEndian ? Triple::riscv64 : Triple::riscv64be;
default:
report_fatal_error("Invalid ELFCLASS!");
}
diff --git a/llvm/include/llvm/Object/SFrameParser.h b/llvm/include/llvm/Object/SFrameParser.h
new file mode 100644
index 0000000..cf4fe20
--- /dev/null
+++ b/llvm/include/llvm/Object/SFrameParser.h
@@ -0,0 +1,48 @@
+//===- SFrameParser.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_SFRAME_H
+#define LLVM_OBJECT_SFRAME_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/BinaryFormat/SFrame.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+
+namespace llvm {
+namespace object {
+
+template <endianness E> class SFrameParser {
+public:
+ static Expected<SFrameParser> create(ArrayRef<uint8_t> Contents);
+
+ const sframe::Preamble<E> &getPreamble() const { return Header.Preamble; }
+ const sframe::Header<E> &getHeader() const { return Header; }
+
+ bool usesFixedRAOffset() const {
+ return getHeader().ABIArch == sframe::ABI::AMD64EndianLittle;
+ }
+ bool usesFixedFPOffset() const {
+ return false; // Not used in any currently defined ABI.
+ }
+
+private:
+ ArrayRef<uint8_t> Data;
+ const sframe::Header<E> &Header;
+
+ SFrameParser(ArrayRef<uint8_t> Data, const sframe::Header<E> &Header)
+ : Data(Data), Header(Header) {}
+};
+
+extern template class SFrameParser<endianness::big>;
+extern template class SFrameParser<endianness::little>;
+
+} // end namespace object
+} // end namespace llvm
+
+#endif // LLVM_OBJECT_SFRAME_H
diff --git a/llvm/include/llvm/Pass.h b/llvm/include/llvm/Pass.h
index 2ecd47d..f3962c3 100644
--- a/llvm/include/llvm/Pass.h
+++ b/llvm/include/llvm/Pass.h
@@ -114,6 +114,10 @@ public:
/// Registration templates, but can be overloaded directly.
virtual StringRef getPassName() const;
+ /// Return a nice clean name for a pass
+ /// corresponding to that used to enable the pass in opt.
+ StringRef getPassArgument() const;
+
/// getPassID - Return the PassID number that corresponds to this pass.
AnalysisID getPassID() const {
return PassID;
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index a8176eb..b0360f1 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -281,7 +281,7 @@ protected:
FunctionPassManager FPM;
FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
- FPM.addPass(InvalidateAnalysisPass<MachineFunctionAnalysis>());
+ FPM.addPass(FreeMachineFunctionPass());
if (this->PB.AddInCGSCCOrder) {
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
createCGSCCToFunctionPassAdaptor(std::move(FPM))));
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 66051d7..fc81ab7 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1132,14 +1132,14 @@ def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
def udiv_by_const : GICombineRule<
(defs root:$root),
(match (G_UDIV $dst, $x, $y):$root,
- [{ return Helper.matchUDivorURemByConst(*${root}); }]),
- (apply [{ Helper.applyUDivorURemByConst(*${root}); }])>;
+ [{ return Helper.matchUDivOrURemByConst(*${root}); }]),
+ (apply [{ Helper.applyUDivOrURemByConst(*${root}); }])>;
def sdiv_by_const : GICombineRule<
(defs root:$root),
(match (G_SDIV $dst, $x, $y):$root,
- [{ return Helper.matchSDivByConst(*${root}); }]),
- (apply [{ Helper.applySDivByConst(*${root}); }])>;
+ [{ return Helper.matchSDivOrSRemByConst(*${root}); }]),
+ (apply [{ Helper.applySDivOrSRemByConst(*${root}); }])>;
def sdiv_by_pow2 : GICombineRule<
(defs root:$root),
@@ -1159,10 +1159,16 @@ def intdiv_combines : GICombineGroup<[udiv_by_pow2, sdiv_by_pow2,
def urem_by_const : GICombineRule<
(defs root:$root),
(match (G_UREM $dst, $x, $y):$root,
- [{ return Helper.matchUDivorURemByConst(*${root}); }]),
- (apply [{ Helper.applyUDivorURemByConst(*${root}); }])>;
+ [{ return Helper.matchUDivOrURemByConst(*${root}); }]),
+ (apply [{ Helper.applyUDivOrURemByConst(*${root}); }])>;
-def intrem_combines : GICombineGroup<[urem_by_const]>;
+def srem_by_const : GICombineRule<
+ (defs root:$root),
+ (match (G_SREM $dst, $x, $y):$root,
+ [{ return Helper.matchSDivOrSRemByConst(*${root}); }]),
+ (apply [{ Helper.applySDivOrSRemByConst(*${root}); }])>;
+
+def intrem_combines : GICombineGroup<[urem_by_const, srem_by_const]>;
def reassoc_ptradd : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 7577792..b65a63b 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -78,6 +78,8 @@ def : GINodeEquiv<G_XOR, xor>;
def : GINodeEquiv<G_SHL, shl>;
def : GINodeEquiv<G_LSHR, srl>;
def : GINodeEquiv<G_ASHR, sra>;
+def : GINodeEquiv<G_ABDS, abds>;
+def : GINodeEquiv<G_ABDU, abdu>;
def : GINodeEquiv<G_SADDSAT, saddsat>;
def : GINodeEquiv<G_UADDSAT, uaddsat>;
def : GINodeEquiv<G_SSUBSAT, ssubsat>;
diff --git a/llvm/include/llvm/TargetParser/Host.h b/llvm/include/llvm/TargetParser/Host.h
index be3d41e..40a9b6c 100644
--- a/llvm/include/llvm/TargetParser/Host.h
+++ b/llvm/include/llvm/TargetParser/Host.h
@@ -53,7 +53,7 @@ LLVM_ABI StringRef getHostCPUName();
/// which features may appear in this map, except that they are all valid LLVM
/// feature names. The map can be empty, for example if feature detection
/// fails.
-LLVM_ABI const StringMap<bool, MallocAllocator> getHostCPUFeatures();
+LLVM_ABI StringMap<bool, MallocAllocator> getHostCPUFeatures();
/// This is a function compatible with cl::AddExtraVersionPrinter, which adds
/// info about the current target triple and detected CPU.
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index 57d771b..670a632 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -49,62 +49,64 @@ public:
enum ArchType {
UnknownArch,
- arm, // ARM (little endian): arm, armv.*, xscale
- armeb, // ARM (big endian): armeb
- aarch64, // AArch64 (little endian): aarch64
- aarch64_be, // AArch64 (big endian): aarch64_be
- aarch64_32, // AArch64 (little endian) ILP32: aarch64_32
- arc, // ARC: Synopsys ARC
- avr, // AVR: Atmel AVR microcontroller
- bpfel, // eBPF or extended BPF or 64-bit BPF (little endian)
- bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian)
- csky, // CSKY: csky
- dxil, // DXIL 32-bit DirectX bytecode
- hexagon, // Hexagon: hexagon
- loongarch32, // LoongArch (32-bit): loongarch32
- loongarch64, // LoongArch (64-bit): loongarch64
- m68k, // M68k: Motorola 680x0 family
- mips, // MIPS: mips, mipsallegrex, mipsr6
- mipsel, // MIPSEL: mipsel, mipsallegrexe, mipsr6el
- mips64, // MIPS64: mips64, mips64r6, mipsn32, mipsn32r6
- mips64el, // MIPS64EL: mips64el, mips64r6el, mipsn32el, mipsn32r6el
- msp430, // MSP430: msp430
- ppc, // PPC: powerpc
- ppcle, // PPCLE: powerpc (little endian)
- ppc64, // PPC64: powerpc64, ppu
- ppc64le, // PPC64LE: powerpc64le
- r600, // R600: AMD GPUs HD2XXX - HD6XXX
- amdgcn, // AMDGCN: AMD GCN GPUs
- riscv32, // RISC-V (32-bit): riscv32
- riscv64, // RISC-V (64-bit): riscv64
- sparc, // Sparc: sparc
- sparcv9, // Sparcv9: Sparcv9
- sparcel, // Sparc: (endianness = little). NB: 'Sparcle' is a CPU variant
- systemz, // SystemZ: s390x
- tce, // TCE (http://tce.cs.tut.fi/): tce
- tcele, // TCE little endian (http://tce.cs.tut.fi/): tcele
- thumb, // Thumb (little endian): thumb, thumbv.*
- thumbeb, // Thumb (big endian): thumbeb
- x86, // X86: i[3-9]86
- x86_64, // X86-64: amd64, x86_64
- xcore, // XCore: xcore
- xtensa, // Tensilica: Xtensa
- nvptx, // NVPTX: 32-bit
- nvptx64, // NVPTX: 64-bit
- amdil, // AMDIL
- amdil64, // AMDIL with 64-bit pointers
- hsail, // AMD HSAIL
- hsail64, // AMD HSAIL with 64-bit pointers
- spir, // SPIR: standard portable IR for OpenCL 32-bit version
- spir64, // SPIR: standard portable IR for OpenCL 64-bit version
- spirv, // SPIR-V with logical memory layout.
- spirv32, // SPIR-V with 32-bit pointers
- spirv64, // SPIR-V with 64-bit pointers
- kalimba, // Kalimba: generic kalimba
- shave, // SHAVE: Movidius vector VLIW processors
- lanai, // Lanai: Lanai 32-bit
- wasm32, // WebAssembly with 32-bit pointers
- wasm64, // WebAssembly with 64-bit pointers
+ arm, // ARM (little endian): arm, armv.*, xscale
+ armeb, // ARM (big endian): armeb
+ aarch64, // AArch64 (little endian): aarch64
+ aarch64_be, // AArch64 (big endian): aarch64_be
+ aarch64_32, // AArch64 (little endian) ILP32: aarch64_32
+ arc, // ARC: Synopsys ARC
+ avr, // AVR: Atmel AVR microcontroller
+ bpfel, // eBPF or extended BPF or 64-bit BPF (little endian)
+ bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian)
+ csky, // CSKY: csky
+ dxil, // DXIL 32-bit DirectX bytecode
+ hexagon, // Hexagon: hexagon
+ loongarch32, // LoongArch (32-bit): loongarch32
+ loongarch64, // LoongArch (64-bit): loongarch64
+ m68k, // M68k: Motorola 680x0 family
+ mips, // MIPS: mips, mipsallegrex, mipsr6
+ mipsel, // MIPSEL: mipsel, mipsallegrexe, mipsr6el
+ mips64, // MIPS64: mips64, mips64r6, mipsn32, mipsn32r6
+ mips64el, // MIPS64EL: mips64el, mips64r6el, mipsn32el, mipsn32r6el
+ msp430, // MSP430: msp430
+ ppc, // PPC: powerpc
+ ppcle, // PPCLE: powerpc (little endian)
+ ppc64, // PPC64: powerpc64, ppu
+ ppc64le, // PPC64LE: powerpc64le
+ r600, // R600: AMD GPUs HD2XXX - HD6XXX
+ amdgcn, // AMDGCN: AMD GCN GPUs
+ riscv32, // RISC-V (32-bit, little endian): riscv32
+ riscv64, // RISC-V (64-bit, little endian): riscv64
+ riscv32be, // RISC-V (32-bit, big endian): riscv32be
+ riscv64be, // RISC-V (64-bit, big endian): riscv64be
+ sparc, // Sparc: sparc
+ sparcv9, // Sparcv9: Sparcv9
+ sparcel, // Sparc: (endianness = little). NB: 'Sparcle' is a CPU variant
+ systemz, // SystemZ: s390x
+ tce, // TCE (http://tce.cs.tut.fi/): tce
+ tcele, // TCE little endian (http://tce.cs.tut.fi/): tcele
+ thumb, // Thumb (little endian): thumb, thumbv.*
+ thumbeb, // Thumb (big endian): thumbeb
+ x86, // X86: i[3-9]86
+ x86_64, // X86-64: amd64, x86_64
+ xcore, // XCore: xcore
+ xtensa, // Tensilica: Xtensa
+ nvptx, // NVPTX: 32-bit
+ nvptx64, // NVPTX: 64-bit
+ amdil, // AMDIL
+ amdil64, // AMDIL with 64-bit pointers
+ hsail, // AMD HSAIL
+ hsail64, // AMD HSAIL with 64-bit pointers
+ spir, // SPIR: standard portable IR for OpenCL 32-bit version
+ spir64, // SPIR: standard portable IR for OpenCL 64-bit version
+ spirv, // SPIR-V with logical memory layout.
+ spirv32, // SPIR-V with 32-bit pointers
+ spirv64, // SPIR-V with 64-bit pointers
+ kalimba, // Kalimba: generic kalimba
+ shave, // SHAVE: Movidius vector VLIW processors
+ lanai, // Lanai: Lanai 32-bit
+ wasm32, // WebAssembly with 32-bit pointers
+ wasm64, // WebAssembly with 64-bit pointers
renderscript32, // 32-bit RenderScript
renderscript64, // 64-bit RenderScript
ve, // NEC SX-Aurora Vector Engine
@@ -1064,10 +1066,14 @@ public:
}
/// Tests whether the target is 32-bit RISC-V.
- bool isRISCV32() const { return getArch() == Triple::riscv32; }
+ bool isRISCV32() const {
+ return getArch() == Triple::riscv32 || getArch() == Triple::riscv32be;
+ }
/// Tests whether the target is 64-bit RISC-V.
- bool isRISCV64() const { return getArch() == Triple::riscv64; }
+ bool isRISCV64() const {
+ return getArch() == Triple::riscv64 || getArch() == Triple::riscv64be;
+ }
/// Tests whether the target is RISC-V (32- and 64-bit).
bool isRISCV() const { return isRISCV32() || isRISCV64(); }
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index bb79d25..3f5f427 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -325,7 +325,6 @@ LLVM_ABI void salvageDebugInfo(Instruction &I);
/// Mark undef if salvaging cannot be completed.
LLVM_ABI void
salvageDebugInfoForDbgValues(Instruction &I,
- ArrayRef<DbgVariableIntrinsic *> Insns,
ArrayRef<DbgVariableRecord *> DPInsns);
/// Given an instruction \p I and DIExpression \p DIExpr operating on
diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
index 4e5da81..9500b1f 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -29,7 +29,6 @@ template <typename T> class SSAUpdaterTraits;
class Type;
class Use;
class Value;
-class DbgValueInst;
/// Helper class for SSA formation on a set of values defined in
/// multiple blocks.
@@ -123,8 +122,6 @@ public:
/// value set to the new SSA value if available, and undef if not.
void UpdateDebugValues(Instruction *I);
void UpdateDebugValues(Instruction *I,
- SmallVectorImpl<DbgValueInst *> &DbgValues);
- void UpdateDebugValues(Instruction *I,
SmallVectorImpl<DbgVariableRecord *> &DbgValues);
/// Rewrite a use like \c RewriteUse but handling in-block definitions.
@@ -136,7 +133,6 @@ public:
private:
Value *GetValueAtEndOfBlockInternal(BasicBlock *BB);
- void UpdateDebugValue(Instruction *I, DbgValueInst *DbgValue);
void UpdateDebugValue(Instruction *I, DbgVariableRecord *DbgValue);
};
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index a101151..39fef92 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -530,6 +530,7 @@ private:
bool isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L);
+ Value *tryToReuseLCSSAPhi(const SCEVAddRecExpr *S);
Value *expandAddRecExprLiterally(const SCEVAddRecExpr *);
PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
const Loop *L, Type *&TruncTy,