67 files changed, 1269 insertions, 572 deletions
diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h
index ff8bdb8..fb91690 100644
--- a/llvm/include/llvm/ADT/ArrayRef.h
+++ b/llvm/include/llvm/ADT/ArrayRef.h
@@ -317,10 +317,6 @@ namespace llvm {
     /// Construct an empty MutableArrayRef.
     /*implicit*/ MutableArrayRef() = default;
 
-    /// Construct an empty MutableArrayRef from std::nullopt.
-    /*implicit*/ LLVM_DEPRECATED("Use {} or MutableArrayRef<T>() instead", "{}")
-    MutableArrayRef(std::nullopt_t) : ArrayRef<T>() {}
-
     /// Construct a MutableArrayRef from a single element.
     /*implicit*/ MutableArrayRef(T &OneElt) : ArrayRef<T>(OneElt) {}
 
diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h
index b850223..9d8fd89 100644
--- a/llvm/include/llvm/ADT/DenseMapInfo.h
+++ b/llvm/include/llvm/ADT/DenseMapInfo.h
@@ -51,10 +51,10 @@ inline unsigned combineHashValue(unsigned a, unsigned b) {
 /// just be `void`.
 template<typename T, typename Enable = void>
 struct DenseMapInfo {
-  //static inline T getEmptyKey();
-  //static inline T getTombstoneKey();
-  //static unsigned getHashValue(const T &Val);
-  //static bool isEqual(const T &LHS, const T &RHS);
+  // static constexpr T getEmptyKey();
+  // static constexpr T getTombstoneKey();
+  // static unsigned getHashValue(const T &Val);
+  // static bool isEqual(const T &LHS, const T &RHS);
 };
 
 // Provide DenseMapInfo for all pointers. Come up with sentinel pointer values
@@ -70,13 +70,13 @@ struct DenseMapInfo<T*> {
   //               "Log2MaxAlign bits of alignment");
   static constexpr uintptr_t Log2MaxAlign = 12;
 
-  static inline T* getEmptyKey() {
+  static constexpr T *getEmptyKey() {
     uintptr_t Val = static_cast<uintptr_t>(-1);
     Val <<= Log2MaxAlign;
     return reinterpret_cast<T*>(Val);
   }
 
-  static inline T* getTombstoneKey() {
+  static constexpr T *getTombstoneKey() {
     uintptr_t Val = static_cast<uintptr_t>(-2);
     Val <<= Log2MaxAlign;
     return reinterpret_cast<T*>(Val);
@@ -92,8 +92,8 @@ struct DenseMapInfo<T*> {
 
 // Provide DenseMapInfo for chars.
 template<> struct DenseMapInfo<char> {
-  static inline char getEmptyKey() { return ~0; }
-  static inline char getTombstoneKey() { return ~0 - 1; }
+  static constexpr char getEmptyKey() { return ~0; }
+  static constexpr char getTombstoneKey() { return ~0 - 1; }
   static unsigned getHashValue(const char& Val) { return Val * 37U; }
 
   static bool isEqual(const char &LHS, const char &RHS) {
@@ -103,8 +103,8 @@ template<> struct DenseMapInfo<char> {
 
 // Provide DenseMapInfo for unsigned chars.
 template <> struct DenseMapInfo<unsigned char> {
-  static inline unsigned char getEmptyKey() { return ~0; }
-  static inline unsigned char getTombstoneKey() { return ~0 - 1; }
+  static constexpr unsigned char getEmptyKey() { return ~0; }
+  static constexpr unsigned char getTombstoneKey() { return ~0 - 1; }
   static unsigned getHashValue(const unsigned char &Val) { return Val * 37U; }
 
   static bool isEqual(const unsigned char &LHS, const unsigned char &RHS) {
@@ -114,8 +114,8 @@ template <> struct DenseMapInfo<unsigned char> {
 
 // Provide DenseMapInfo for unsigned shorts.
 template <> struct DenseMapInfo<unsigned short> {
-  static inline unsigned short getEmptyKey() { return 0xFFFF; }
-  static inline unsigned short getTombstoneKey() { return 0xFFFF - 1; }
+  static constexpr unsigned short getEmptyKey() { return 0xFFFF; }
+  static constexpr unsigned short getTombstoneKey() { return 0xFFFF - 1; }
   static unsigned getHashValue(const unsigned short &Val) { return Val * 37U; }
 
   static bool isEqual(const unsigned short &LHS, const unsigned short &RHS) {
@@ -125,8 +125,8 @@ template <> struct DenseMapInfo<unsigned short> {
 
 // Provide DenseMapInfo for unsigned ints.
 template<> struct DenseMapInfo<unsigned> {
-  static inline unsigned getEmptyKey() { return ~0U; }
-  static inline unsigned getTombstoneKey() { return ~0U - 1; }
+  static constexpr unsigned getEmptyKey() { return ~0U; }
+  static constexpr unsigned getTombstoneKey() { return ~0U - 1; }
   static unsigned getHashValue(const unsigned& Val) { return Val * 37U; }
 
   static bool isEqual(const unsigned& LHS, const unsigned& RHS) {
@@ -136,8 +136,8 @@ template<> struct DenseMapInfo<unsigned> {
 
 // Provide DenseMapInfo for unsigned longs.
 template<> struct DenseMapInfo<unsigned long> {
-  static inline unsigned long getEmptyKey() { return ~0UL; }
-  static inline unsigned long getTombstoneKey() { return ~0UL - 1L; }
+  static constexpr unsigned long getEmptyKey() { return ~0UL; }
+  static constexpr unsigned long getTombstoneKey() { return ~0UL - 1L; }
 
   static unsigned getHashValue(const unsigned long& Val) {
     if constexpr (sizeof(Val) == 4)
@@ -153,8 +153,8 @@ template<> struct DenseMapInfo<unsigned long> {
 
 // Provide DenseMapInfo for unsigned long longs.
 template<> struct DenseMapInfo<unsigned long long> {
-  static inline unsigned long long getEmptyKey() { return ~0ULL; }
-  static inline unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; }
+  static constexpr unsigned long long getEmptyKey() { return ~0ULL; }
+  static constexpr unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; }
 
   static unsigned getHashValue(const unsigned long long& Val) {
     return densemap::detail::mix(Val);
@@ -168,16 +168,16 @@ template<> struct DenseMapInfo<unsigned long long> {
 
 // Provide DenseMapInfo for shorts.
 template <> struct DenseMapInfo<short> {
-  static inline short getEmptyKey() { return 0x7FFF; }
-  static inline short getTombstoneKey() { return -0x7FFF - 1; }
+  static constexpr short getEmptyKey() { return 0x7FFF; }
+  static constexpr short getTombstoneKey() { return -0x7FFF - 1; }
   static unsigned getHashValue(const short &Val) { return Val * 37U; }
   static bool isEqual(const short &LHS, const short &RHS) { return LHS == RHS; }
 };
 
 // Provide DenseMapInfo for ints.
 template<> struct DenseMapInfo<int> {
-  static inline int getEmptyKey() { return 0x7fffffff; }
-  static inline int getTombstoneKey() { return -0x7fffffff - 1; }
+  static constexpr int getEmptyKey() { return 0x7fffffff; }
+  static constexpr int getTombstoneKey() { return -0x7fffffff - 1; }
   static unsigned getHashValue(const int& Val) { return (unsigned)(Val * 37U); }
 
   static bool isEqual(const int& LHS, const int& RHS) {
@@ -187,11 +187,11 @@ template<> struct DenseMapInfo<int> {
 
 // Provide DenseMapInfo for longs.
 template<> struct DenseMapInfo<long> {
-  static inline long getEmptyKey() {
+  static constexpr long getEmptyKey() {
     return (1UL << (sizeof(long) * 8 - 1)) - 1UL;
   }
 
-  static inline long getTombstoneKey() { return getEmptyKey() - 1L; }
+  static constexpr long getTombstoneKey() { return getEmptyKey() - 1L; }
 
   static unsigned getHashValue(const long& Val) {
     return (unsigned)(Val * 37UL);
@@ -204,8 +204,10 @@ template<> struct DenseMapInfo<long> {
 
 // Provide DenseMapInfo for long longs.
 template<> struct DenseMapInfo<long long> {
-  static inline long long getEmptyKey() { return 0x7fffffffffffffffLL; }
-  static inline long long getTombstoneKey() { return -0x7fffffffffffffffLL-1; }
+  static constexpr long long getEmptyKey() { return 0x7fffffffffffffffLL; }
+  static constexpr long long getTombstoneKey() {
+    return -0x7fffffffffffffffLL - 1;
+  }
 
   static unsigned getHashValue(const long long& Val) {
     return (unsigned)(Val * 37ULL);
@@ -224,12 +226,12 @@ struct DenseMapInfo<std::pair<T, U>> {
   using FirstInfo = DenseMapInfo<T>;
   using SecondInfo = DenseMapInfo<U>;
 
-  static inline Pair getEmptyKey() {
+  static constexpr Pair getEmptyKey() {
     return std::make_pair(FirstInfo::getEmptyKey(),
                           SecondInfo::getEmptyKey());
   }
 
-  static inline Pair getTombstoneKey() {
+  static constexpr Pair getTombstoneKey() {
     return std::make_pair(FirstInfo::getTombstoneKey(),
                           SecondInfo::getTombstoneKey());
   }
@@ -257,11 +259,11 @@ struct DenseMapInfo<std::pair<T, U>> {
 template <typename... Ts> struct DenseMapInfo<std::tuple<Ts...>> {
   using Tuple = std::tuple<Ts...>;
 
-  static inline Tuple getEmptyKey() {
+  static constexpr Tuple getEmptyKey() {
     return Tuple(DenseMapInfo<Ts>::getEmptyKey()...);
   }
 
-  static inline Tuple getTombstoneKey() {
+  static constexpr Tuple getTombstoneKey() {
     return Tuple(DenseMapInfo<Ts>::getTombstoneKey()...);
   }
 
@@ -309,10 +311,22 @@ struct DenseMapInfo<Enum, std::enable_if_t<std::is_enum_v<Enum>>> {
   using UnderlyingType = std::underlying_type_t<Enum>;
   using Info = DenseMapInfo<UnderlyingType>;
 
-  static Enum getEmptyKey() { return static_cast<Enum>(Info::getEmptyKey()); }
+  // If an enum does not have a "fixed" underlying type, it may be UB to cast
+  // some values of the underlying type to the enum. We use an "extra" constexpr
+  // local to ensure that such UB would trigger "static assertion expression is
+  // not an integral constant expression", rather than runtime UB.
+  //
+  // If you hit this error, you can fix by switching to `enum class`, or adding
+  // an explicit underlying type (e.g. `enum X : int`) to the enum's definition.
+
+  static constexpr Enum getEmptyKey() {
+    constexpr Enum V = static_cast<Enum>(Info::getEmptyKey());
+    return V;
+  }
 
-  static Enum getTombstoneKey() {
-    return static_cast<Enum>(Info::getTombstoneKey());
+  static constexpr Enum getTombstoneKey() {
+    constexpr Enum V = static_cast<Enum>(Info::getTombstoneKey());
+    return V;
   }
 
   static unsigned getHashValue(const Enum &Val) {
@@ -326,9 +340,11 @@ template <typename T> struct DenseMapInfo<std::optional<T>> {
   using Optional = std::optional<T>;
   using Info = DenseMapInfo<T>;
 
-  static inline Optional getEmptyKey() { return {Info::getEmptyKey()}; }
+  static constexpr Optional getEmptyKey() { return {Info::getEmptyKey()}; }
 
-  static inline Optional getTombstoneKey() { return {Info::getTombstoneKey()}; }
+  static constexpr Optional getTombstoneKey() {
+    return {Info::getTombstoneKey()};
+  }
 
   static unsigned getHashValue(const Optional &OptionalVal) {
     return detail::combineHashValue(
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 73bfe1a..af6e534 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -236,8 +236,8 @@ public:
 
   /// In same cases when the dependency check fails we can still
   /// vectorize the loop with a dynamic array access check.
-  bool shouldRetryWithRuntimeCheck() const {
-    return FoundNonConstantDistanceDependence &&
+  bool shouldRetryWithRuntimeChecks() const {
+    return ShouldRetryWithRuntimeChecks &&
            Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks;
   }
 
@@ -327,9 +327,9 @@ private:
   uint64_t MaxStoreLoadForwardSafeDistanceInBits =
       std::numeric_limits<uint64_t>::max();
 
-  /// If we see a non-constant dependence distance we can still try to
-  /// vectorize this loop with runtime checks.
-  bool FoundNonConstantDistanceDependence = false;
+  /// Whether we should try to vectorize the loop with runtime checks, if the
+  /// dependencies are not safe.
+  bool ShouldRetryWithRuntimeChecks = false;
 
   /// Result of the dependence checks, indicating whether the checked
   /// dependences are safe for vectorization, require RT checks or are known to
diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index b042a71..571caf9 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -102,6 +102,12 @@ private:
   // The maximum size of a cold allocation context, from the profile summary.
   uint64_t MaxColdSize;
 
+  // Tracks whether we have built the Trie from existing MD_memprof metadata. We
+  // apply different heuristics for determining whether to discard non-cold
+  // contexts when rebuilding as we have lost information available during the
+  // original profile match.
+  bool BuiltFromExistingMetadata = false;
+
   void deleteTrieNode(CallStackTrieNode *Node) {
     if (!Node)
       return;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 98b793a..7928835 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1930,7 +1930,7 @@ public:
 
   /// Returns a bitmask constructed from the target-features or fmv-features
   /// metadata of a function.
-  LLVM_ABI uint64_t getFeatureMask(const Function &F) const;
+  LLVM_ABI APInt getFeatureMask(const Function &F) const;
 
   /// Returns true if this is an instance of a function with multiple versions.
   LLVM_ABI bool isMultiversionedFunction(const Function &F) const;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index ddc8a5e..2ea87b3 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1126,7 +1126,9 @@ public:
 
   virtual bool hasArmWideBranch(bool) const { return false; }
 
-  virtual uint64_t getFeatureMask(const Function &F) const { return 0; }
+  virtual APInt getFeatureMask(const Function &F) const {
+    return APInt::getZero(32);
+  }
 
   virtual bool isMultiversionedFunction(const Function &F) const {
     return false;
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index c7e4bdf..a2311d2 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -181,6 +181,7 @@ enum Kind {
   kw_amdgpu_cs_chain_preserve,
   kw_amdgpu_kernel,
   kw_amdgpu_gfx,
+  kw_amdgpu_gfx_whole_wave,
   kw_tailcc,
   kw_m68k_rtdcc,
   kw_graalcc,
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index e4f82ad..ad35d7f 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -362,6 +362,7 @@ enum {
   ELFOSABI_FENIXOS = 16,       // FenixOS
   ELFOSABI_CLOUDABI = 17,      // Nuxi CloudABI
   ELFOSABI_CUDA = 51,          // NVIDIA CUDA architecture.
+  ELFOSABI_CUDA_V2 = 41,       // NVIDIA CUDA architecture.
   ELFOSABI_FIRST_ARCH = 64,    // First architecture-specific OS ABI
   ELFOSABI_AMDGPU_HSA = 64,    // AMD HSA runtime
   ELFOSABI_AMDGPU_PAL = 65,    // AMD PAL runtime
@@ -385,6 +386,12 @@ enum {
   ELFABIVERSION_AMDGPU_HSA_V6 = 4,
 };
 
+// CUDA OS ABI Version identification.
+enum {
+  ELFABIVERSION_CUDA_V1 = 7,
+  ELFABIVERSION_CUDA_V2 = 8,
+};
+
 #define ELF_RELOC(name, value) name = value,
 
 // X86_64 relocations.
@@ -921,7 +928,7 @@ enum {
 
 // NVPTX specific e_flags.
 enum : unsigned {
-  // Processor selection mask for EF_CUDA_SM* values.
+  // Processor selection mask for EF_CUDA_SM* values prior to blackwell.
   EF_CUDA_SM = 0xff,
 
   // SM based processor values.
@@ -954,12 +961,22 @@ enum : unsigned {
   // The target is using 64-bit addressing.
   EF_CUDA_64BIT_ADDRESS = 0x400,
   // Set when using the sm_90a processor.
-  EF_CUDA_ACCELERATORS = 0x800,
+  EF_CUDA_ACCELERATORS_V1 = 0x800,
   // Undocumented software feature.
   EF_CUDA_SW_FLAG_V2 = 0x1000,
 
   // Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
   EF_CUDA_VIRTUAL_SM = 0xff0000,
+
+  // Processor selection mask for EF_CUDA_SM* values following blackwell.
+  EF_CUDA_SM_MASK = 0xff00,
+
+  // SM based processor values.
+  EF_CUDA_SM100 = 0x6400,
+  EF_CUDA_SM120 = 0x7800,
+
+  // Set when using an accelerator variant like sm_100a.
+  EF_CUDA_ACCELERATORS = 0x8,
 };
 
 // ELF Relocation types for BPF
diff --git a/llvm/include/llvm/BinaryFormat/SFrame.h b/llvm/include/llvm/BinaryFormat/SFrame.h
index 16d3b16..98dbe38 100644
--- a/llvm/include/llvm/BinaryFormat/SFrame.h
+++ b/llvm/include/llvm/BinaryFormat/SFrame.h
@@ -15,33 +15,36 @@
 #ifndef LLVM_BINARYFORMAT_SFRAME_H
 #define LLVM_BINARYFORMAT_SFRAME_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Endian.h"
 
-namespace llvm::sframe {
+namespace llvm {
+
+template <typename T> struct EnumEntry;
+
+namespace sframe {
 
 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
 
 constexpr uint16_t Magic = 0xdee2;
 
 enum class Version : uint8_t {
-  V1 = 1,
-  V2 = 2,
+#define HANDLE_SFRAME_VERSION(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/SFrameConstants.def"
 };
 
 enum class Flags : uint8_t {
-  FDESorted = 0x01,
-  FramePointer = 0x02,
-  FDEFuncStartPCRel = 0x04,
+#define HANDLE_SFRAME_FLAG(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/SFrameConstants.def"
   V2AllFlags = FDESorted | FramePointer | FDEFuncStartPCRel,
   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xff),
 };
 
 enum class ABI : uint8_t {
-  AArch64EndianBig = 1,
-  AArch64EndianLittle = 2,
-  AMD64EndianLittle = 3,
+#define HANDLE_SFRAME_ABI(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/SFrameConstants.def"
 };
 
 /// SFrame FRE Types. Bits 0-3 of FuncDescEntry.Info.
@@ -160,6 +163,11 @@ template <endianness E> using FrameRowEntryAddr1 = FrameRowEntry<uint8_t, E>;
 template <endianness E> using FrameRowEntryAddr2 = FrameRowEntry<uint16_t, E>;
 template <endianness E> using FrameRowEntryAddr4 = FrameRowEntry<uint32_t, E>;
 
-} // namespace llvm::sframe
+ArrayRef<EnumEntry<Version>> getVersions();
+ArrayRef<EnumEntry<Flags>> getFlags();
+ArrayRef<EnumEntry<ABI>> getABIs();
+
+} // namespace sframe
+} // namespace llvm
 
 #endif // LLVM_BINARYFORMAT_SFRAME_H
diff --git a/llvm/include/llvm/BinaryFormat/SFrameConstants.def b/llvm/include/llvm/BinaryFormat/SFrameConstants.def
new file mode 100644
index 0000000..643b15f
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/SFrameConstants.def
@@ -0,0 +1,39 @@
+//===- SFrameConstants.def --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if !(defined(HANDLE_SFRAME_VERSION) || defined(HANDLE_SFRAME_FLAG) ||  \
+      defined(HANDLE_SFRAME_ABI))
+#error "Missing HANDLE_SFRAME definition"
+#endif
+
+#ifndef HANDLE_SFRAME_VERSION
+#define HANDLE_SFRAME_VERSION(CODE, NAME)
+#endif
+
+#ifndef HANDLE_SFRAME_FLAG
+#define HANDLE_SFRAME_FLAG(CODE, NAME)
+#endif
+
+#ifndef HANDLE_SFRAME_ABI
+#define HANDLE_SFRAME_ABI(CODE, NAME)
+#endif
+
+HANDLE_SFRAME_VERSION(0x01, V1)
+HANDLE_SFRAME_VERSION(0x02, V2)
+
+HANDLE_SFRAME_FLAG(0x01, FDESorted)
+HANDLE_SFRAME_FLAG(0x02, FramePointer)
+HANDLE_SFRAME_FLAG(0x04, FDEFuncStartPCRel)
+
+HANDLE_SFRAME_ABI(0x01, AArch64EndianBig)
+HANDLE_SFRAME_ABI(0x02, AArch64EndianLittle)
+HANDLE_SFRAME_ABI(0x03, AMD64EndianLittle)
+
+#undef HANDLE_SFRAME_VERSION
+#undef HANDLE_SFRAME_FLAG
+#undef HANDLE_SFRAME_ABI
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 1d7c414..1fcedcd 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1985,11 +1985,6 @@ public:
           cast<VectorType>(Args[0]->getType()), {}, CostKind, Index,
           cast<VectorType>(Args[1]->getType()));
     }
-    case Intrinsic::vector_reverse: {
-      return thisT()->getShuffleCost(TTI::SK_Reverse, cast<VectorType>(RetTy),
-                                     cast<VectorType>(Args[0]->getType()), {},
-                                     CostKind, 0, cast<VectorType>(RetTy));
-    }
     case Intrinsic::vector_splice: {
       unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
       return thisT()->getShuffleCost(TTI::SK_Splice, cast<VectorType>(RetTy),
@@ -2458,6 +2453,10 @@ public:
           thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind);
       return Cost;
     }
+    case Intrinsic::vector_reverse:
+      return thisT()->getShuffleCost(TTI::SK_Reverse, cast<VectorType>(RetTy),
+                                     cast<VectorType>(ICA.getArgTypes()[0]), {},
+                                     CostKind, 0, cast<VectorType>(RetTy));
     case Intrinsic::get_active_lane_mask: {
       Type *ArgTy = ICA.getArgTypes()[0];
       EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
index da73238..490d1a3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
@@ -103,6 +103,20 @@ public:
   /// \return The known alignment for the pointer-like value \p R.
   Align computeKnownAlignment(Register R, unsigned Depth = 0);
 
+  /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts
+  /// that are all less than the element bit-width of the shift node, return the
+  /// valid constant range.
+  std::optional<ConstantRange>
+  getValidShiftAmountRange(Register R, const APInt &DemandedElts,
+                           unsigned Depth);
+
+  /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts
+  /// that are all less than the element bit-width of the shift node, return the
+  /// minimum possible value.
+  std::optional<uint64_t> getValidMinimumShiftAmount(Register R,
+                                                     const APInt &DemandedElts,
+                                                     unsigned Depth = 0);
+
   /// Determine which floating-point classes are valid for \p V, and return them
   /// in KnownFPClass bit sets.
   ///
diff --git a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
index c22f9d4..c70413d 100644
--- a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
+++ b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
@@ -15,19 +15,17 @@
 #define LLVM_CODEGEN_LINKALLASMWRITERCOMPONENTS_H
 
 #include "llvm/IR/BuiltinGCs.h"
-#include <cstdlib>
+#include "llvm/Support/AlwaysTrue.h"
 
 namespace {
   struct ForceAsmWriterLinking {
     ForceAsmWriterLinking() {
       // We must reference the plug-ins in such a way that compilers will not
       // delete it all as dead code, even with whole program optimization,
-      // yet is effectively a NO-OP. As the compiler isn't smart enough
-      // to know that getenv() never returns -1, this will do the job.
-      // This is so that globals in the translation units where these functions
-      // are defined are forced to be initialized, populating various
-      // registries.
-      if (std::getenv("bar") != (char*) -1)
+      // yet is effectively a NO-OP. This is so that globals in the translation
+      // units where these functions are defined are forced to be initialized,
+      // populating various registries.
+      if (llvm::getNonFoldableAlwaysTrue())
         return;
 
       llvm::linkOcamlGCPrinter();
diff --git a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 6f56682..f0a01d2 100644
--- a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -16,20 +16,18 @@
 
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Support/AlwaysTrue.h"
 #include "llvm/Target/TargetMachine.h"
-#include <cstdlib>
 
 namespace {
   struct ForceCodegenLinking {
     ForceCodegenLinking() {
       // We must reference the passes in such a way that compilers will not
       // delete it all as dead code, even with whole program optimization,
-      // yet is effectively a NO-OP. As the compiler isn't smart enough
-      // to know that getenv() never returns -1, this will do the job.
-      // This is so that globals in the translation units where these functions
-      // are defined are forced to be initialized, populating various
-      // registries.
-      if (std::getenv("bar") != (char*) -1)
+      // yet is effectively a NO-OP. This is so that globals in the translation
+      // units where these functions are defined are forced to be initialized,
+      // populating various registries.
+      if (llvm::getNonFoldableAlwaysTrue())
         return;
 
       (void) llvm::createFastRegisterAllocator();
diff --git a/llvm/include/llvm/CodeGen/MachineInstrBundle.h b/llvm/include/llvm/CodeGen/MachineInstrBundle.h
index d324236..ebf7534 100644
--- a/llvm/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/llvm/include/llvm/CodeGen/MachineInstrBundle.h
@@ -15,6 +15,7 @@
 #define LLVM_CODEGEN_MACHINEINSTRBUNDLE_H
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
@@ -294,6 +295,12 @@ LLVM_ABI PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI,
                                             Register Reg,
                                             const TargetRegisterInfo *TRI);
 
+class FinalizeBundleTestPass : public PassInfoMixin<FinalizeBundleTestPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+
 } // End llvm namespace
 
 #endif
diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index e7a7091..efda7eb 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -65,7 +65,7 @@
 //
 // void <SubTarget>Subtarget::
 // overrideSchedPolicy(MachineSchedPolicy &Policy,
-//                     unsigned NumRegionInstrs) const {
+//                     const SchedRegion &Region) const {
 //   Policy.<Flag> = true;
 // }
 //
@@ -218,6 +218,22 @@ struct MachineSchedPolicy {
   MachineSchedPolicy() = default;
 };
 
+/// A region of an MBB for scheduling.
+struct SchedRegion {
+  /// RegionBegin is the first instruction in the scheduling region, and
+  /// RegionEnd is either MBB->end() or the scheduling boundary after the
+  /// last instruction in the scheduling region. These iterators cannot refer
+  /// to instructions outside of the identified scheduling region because
+  /// those may be reordered before scheduling this region.
+  MachineBasicBlock::iterator RegionBegin;
+  MachineBasicBlock::iterator RegionEnd;
+  unsigned NumRegionInstrs;
+
+  SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
+              unsigned N)
+      : RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
+};
+
 /// MachineSchedStrategy - Interface to the scheduling algorithm used by
 /// ScheduleDAGMI.
 ///
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 714285e..095a40e 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -438,10 +438,6 @@ LLVM_ABI extern char &UnpackMachineBundlesID;
 LLVM_ABI FunctionPass *
 createUnpackMachineBundles(std::function<bool(const MachineFunction &)> Ftor);
 
-/// FinalizeMachineBundles - This pass finalize machine instruction
-/// bundles (created earlier, e.g. during pre-RA scheduling).
-LLVM_ABI extern char &FinalizeMachineBundlesID;
-
 /// StackMapLiveness - This pass analyses the register live-out set of
 /// stackmap/patchpoint intrinsics and attaches the calculated information to
 /// the intrinsic for later emission to the StackMap.
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 657951d..eac8e14 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1202,13 +1202,16 @@ public:
   LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
                            ArrayRef<SDValue> Ops, const SDNodeFlags Flags);
   LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL,
-                           ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops);
+                           ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops,
+                           const SDNodeFlags Flags);
   LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
                            ArrayRef<SDValue> Ops, const SDNodeFlags Flags);
 
   // Use flags from current flag inserter.
   LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
                            ArrayRef<SDValue> Ops);
+  LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL,
+                           ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops);
   LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
                            ArrayRef<SDValue> Ops);
   LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -1346,9 +1349,10 @@ public:
   /// Helper function to make it easier to build SelectCC's if you just have an
   /// ISD::CondCode instead of an SDValue.
   SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True,
-                      SDValue False, ISD::CondCode Cond) {
+                      SDValue False, ISD::CondCode Cond,
+                      SDNodeFlags Flags = SDNodeFlags()) {
     return getNode(ISD::SELECT_CC, DL, True.getValueType(), LHS, RHS, True,
-                   False, getCondCode(Cond));
+                   False, getCondCode(Cond), Flags);
   }
 
   /// Try to simplify a select/vselect into 1 of its operands or a constant.
@@ -1425,10 +1429,9 @@ public:
 
   /// Creates a LifetimeSDNode that starts (`IsStart==true`) or ends
   /// (`IsStart==false`) the lifetime of the portion of `FrameIndex` between
-  /// offsets `Offset` and `Offset + Size`.
+  /// offsets `0` and `Size`.
   LLVM_ABI SDValue getLifetimeNode(bool IsStart, const SDLoc &dl, SDValue Chain,
-                                   int FrameIndex, int64_t Size,
-                                   int64_t Offset = -1);
+                                   int FrameIndex, int64_t Size);
 
   /// Creates a PseudoProbeSDNode with function GUID `Guid` and
   /// the index of the block `Index` it is probing, as well as the attributes
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 5d9937f..8e9c1f7 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -2004,25 +2004,17 @@ public:
 class LifetimeSDNode : public SDNode {
   friend class SelectionDAG;
   int64_t Size;
-  int64_t Offset; // -1 if offset is unknown.
 
   LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
-                 SDVTList VTs, int64_t Size, int64_t Offset)
-      : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
+                 SDVTList VTs, int64_t Size)
+      : SDNode(Opcode, Order, dl, VTs), Size(Size) {}
+
 public:
   int64_t getFrameIndex() const {
     return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
   }
 
-  bool hasOffset() const { return Offset >= 0; }
-  int64_t getOffset() const {
-    assert(hasOffset() && "offset is unknown");
-    return Offset;
-  }
-  int64_t getSize() const {
-    assert(hasOffset() && "offset is unknown");
-    return Size;
-  }
+  int64_t getSize() const { return Size; }
 
   // Methods to support isa and dyn_cast
   static bool classof(const SDNode *N) {
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 1a548a5..cbdc1b6 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3219,25 +3219,19 @@ public:
   /// Lower an interleaved store to target specific intrinsics. Return
   /// true on success.
   ///
-  /// \p SI is the vector store instruction.
+  /// \p SI is the vector store instruction.  Can be either a plain store
+  /// or a vp.store.
+  /// \p Mask is a per-segment (i.e. number of lanes equal to that of one
+  /// component being interwoven) mask.  Can be nullptr, in which case the
+  /// result is unconditional.
   /// \p SVI is the shufflevector to RE-interleave the stored vector.
   /// \p Factor is the interleave factor.
-  virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+  virtual bool lowerInterleavedStore(Instruction *Store, Value *Mask,
+                                     ShuffleVectorInst *SVI,
                                      unsigned Factor) const {
     return false;
   }
 
-  /// Lower an interleaved store to target specific intrinsics. Return
-  /// true on success.
-  ///
-  /// \p Store is the vp.store instruction.
-  /// \p Mask is a mask value
-  /// \p InterleaveOps is a list of values being interleaved.
-  virtual bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask,
-                                       ArrayRef<Value *> InterleaveOps) const {
-    return false;
-  }
-
   /// Lower a deinterleave intrinsic to a target specific load intrinsic.
   /// Return true on success. Currently only supports
   /// llvm.vector.deinterleave{2,3,5,7}
diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index 45e67d8..a8c7a8a 100644
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -54,6 +54,7 @@ class TargetRegisterClass;
 class TargetRegisterInfo;
 class TargetSchedModel;
 class Triple;
+struct SchedRegion;
 
 //===----------------------------------------------------------------------===//
 ///
@@ -231,7 +232,7 @@ public:
   /// scheduling heuristics (no custom MachineSchedStrategy) to make
   /// changes to the generic scheduling policy.
   virtual void overrideSchedPolicy(MachineSchedPolicy &Policy,
-                                   unsigned NumRegionInstrs) const {}
+                                   const SchedRegion &Region) const {}
 
   /// Override generic post-ra scheduling policy within a region.
   ///
@@ -241,7 +242,7 @@ public:
   /// Note that some options like tracking register pressure won't take effect
   /// in post-ra scheduling.
   virtual void overridePostRASchedPolicy(MachineSchedPolicy &Policy,
-                                         unsigned NumRegionInstrs) const {}
+                                         const SchedRegion &Region) const {}
 
   // Perform target-specific adjustments to the latency of a schedule
   // dependency.
diff --git a/llvm/include/llvm/Config/abi-breaking.h.cmake b/llvm/include/llvm/Config/abi-breaking.h.cmake
index 2d27e02..330f360 100644
--- a/llvm/include/llvm/Config/abi-breaking.h.cmake
+++ b/llvm/include/llvm/Config/abi-breaking.h.cmake
@@ -12,12 +12,41 @@
 #ifndef LLVM_ABI_BREAKING_CHECKS_H
 #define LLVM_ABI_BREAKING_CHECKS_H
 
+// llvm-config.h is required for LLVM_ENABLE_LLVM_EXPORT_ANNOTATIONS
+#include "llvm/Config/llvm-config.h"
+
 /* Define to enable checks that alter the LLVM C++ ABI */
 #cmakedefine01 LLVM_ENABLE_ABI_BREAKING_CHECKS
 
 /* Define to enable reverse iteration of unordered llvm containers */
 #cmakedefine01 LLVM_ENABLE_REVERSE_ITERATION
 
+#if !defined(__has_attribute)
+#define __has_attribute(attribute) 0
+#endif
+
+// Properly annotate EnableABIBreakingChecks or DisableABIBreakingChecks for
+// export from shared library.
+// TODO(https://github.com/llvm/llvm-project/issues/145406): eliminate need for
+// two preprocessor definitions to gate LLVM_ABI macro definitions.
+#if defined(LLVM_BUILD_STATIC) || !defined(LLVM_ENABLE_LLVM_EXPORT_ANNOTATIONS)
+#define ABI_BREAKING_EXPORT_ABI
+#else
+#if defined(_WIN32)
+#if defined(LLVM_EXPORTS)
+#define ABI_BREAKING_EXPORT_ABI __declspec(dllexport)
+#else
+#define ABI_BREAKING_EXPORT_ABI __declspec(dllimport)
+#endif
+#else
+#if __has_attribute(visibility)
+#define ABI_BREAKING_EXPORT_ABI __attribute__((__visibility__("default")))
+#else
+#define ABI_BREAKING_EXPORT_ABI
+#endif
+#endif
+#endif
+
 /* Allow selectively disabling link-time mismatch checking so that header-only
    ADT content from LLVM can be used without linking libSupport. */
 #if !defined(LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING) || !LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
@@ -43,12 +72,12 @@
 #endif
 namespace llvm {
 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
-extern int EnableABIBreakingChecks;
+ABI_BREAKING_EXPORT_ABI extern int EnableABIBreakingChecks;
 LLVM_HIDDEN_VISIBILITY
 __attribute__((weak)) int *VerifyEnableABIBreakingChecks =
     &EnableABIBreakingChecks;
 #else
-extern int DisableABIBreakingChecks;
+ABI_BREAKING_EXPORT_ABI extern int DisableABIBreakingChecks;
 LLVM_HIDDEN_VISIBILITY
 __attribute__((weak)) int *VerifyDisableABIBreakingChecks =
     &DisableABIBreakingChecks;
diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h
index 21e7457..d9b08b2 100644
--- a/llvm/include/llvm/Demangle/Demangle.h
+++ b/llvm/include/llvm/Demangle/Demangle.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_DEMANGLE_DEMANGLE_H
 #define LLVM_DEMANGLE_DEMANGLE_H
 
+#include "DemangleConfig.h"
 #include <cstddef>
 #include <optional>
 #include <string>
@@ -33,7 +34,8 @@ enum : int {
 /// Returns a non-NULL pointer to a NUL-terminated C style string
 /// that should be explicitly freed, if successful. Otherwise, may return
 /// nullptr if mangled_name is not a valid mangling or is nullptr.
-char *itaniumDemangle(std::string_view mangled_name, bool ParseParams = true);
+DEMANGLE_ABI char *itaniumDemangle(std::string_view mangled_name,
+                                   bool ParseParams = true);
 
 enum MSDemangleFlags {
   MSDF_None = 0,
@@ -52,87 +54,90 @@ enum MSDemangleFlags {
 /// bytes of the input string were consumed.
 /// status receives one of the demangle_ enum entries above if it's not nullptr.
 /// Flags controls various details of the demangled representation.
-char *microsoftDemangle(std::string_view mangled_name, size_t *n_read,
-                        int *status, MSDemangleFlags Flags = MSDF_None);
+DEMANGLE_ABI char *microsoftDemangle(std::string_view mangled_name,
+                                     size_t *n_read, int *status,
+                                     MSDemangleFlags Flags = MSDF_None);
 
-std::optional<size_t>
+DEMANGLE_ABI std::optional<size_t>
 getArm64ECInsertionPointInMangledName(std::string_view MangledName);
 
 // Demangles a Rust v0 mangled symbol.
-char *rustDemangle(std::string_view MangledName);
+DEMANGLE_ABI char *rustDemangle(std::string_view MangledName);
 
 // Demangles a D mangled symbol.
-char *dlangDemangle(std::string_view MangledName);
+DEMANGLE_ABI char *dlangDemangle(std::string_view MangledName);
 
 /// Attempt to demangle a string using different demangling schemes.
 /// The function uses heuristics to determine which demangling scheme to use.
 /// \param MangledName - reference to string to demangle.
 /// \returns - the demangled string, or a copy of the input string if no
 /// demangling occurred.
-std::string demangle(std::string_view MangledName);
+DEMANGLE_ABI std::string demangle(std::string_view MangledName);
 
-bool nonMicrosoftDemangle(std::string_view MangledName, std::string &Result,
-                          bool CanHaveLeadingDot = true,
-                          bool ParseParams = true);
+DEMANGLE_ABI bool nonMicrosoftDemangle(std::string_view MangledName,
+                                       std::string &Result,
+                                       bool CanHaveLeadingDot = true,
+                                       bool ParseParams = true);
 
 /// "Partial" demangler. This supports demangling a string into an AST
 /// (typically an intermediate stage in itaniumDemangle) and querying certain
 /// properties or partially printing the demangled name.
 struct ItaniumPartialDemangler {
-  ItaniumPartialDemangler();
+  DEMANGLE_ABI ItaniumPartialDemangler();
 
-  ItaniumPartialDemangler(ItaniumPartialDemangler &&Other);
-  ItaniumPartialDemangler &operator=(ItaniumPartialDemangler &&Other);
+  DEMANGLE_ABI ItaniumPartialDemangler(ItaniumPartialDemangler &&Other);
+  DEMANGLE_ABI ItaniumPartialDemangler &
+  operator=(ItaniumPartialDemangler &&Other);
 
   /// Demangle into an AST. Subsequent calls to the rest of the member functions
   /// implicitly operate on the AST this produces.
   /// \return true on error, false otherwise
-  bool partialDemangle(const char *MangledName);
+  DEMANGLE_ABI bool partialDemangle(const char *MangledName);
 
   /// Just print the entire mangled name into Buf. Buf and N behave like the
   /// second and third parameters to __cxa_demangle.
-  char *finishDemangle(char *Buf, size_t *N) const;
+  DEMANGLE_ABI char *finishDemangle(char *Buf, size_t *N) const;
 
   /// See \ref finishDemangle
   ///
   /// \param[in] OB A llvm::itanium_demangle::OutputBuffer that the demangled
   /// name will be printed into.
   ///
-  char *finishDemangle(void *OB) const;
+  DEMANGLE_ABI char *finishDemangle(void *OB) const;
 
   /// Get the base name of a function. This doesn't include trailing template
   /// arguments, ie for "a::b<int>" this function returns "b".
-  char *getFunctionBaseName(char *Buf, size_t *N) const;
+  DEMANGLE_ABI char *getFunctionBaseName(char *Buf, size_t *N) const;
 
   /// Get the context name for a function. For "a::b::c", this function returns
   /// "a::b".
-  char *getFunctionDeclContextName(char *Buf, size_t *N) const;
+  DEMANGLE_ABI char *getFunctionDeclContextName(char *Buf, size_t *N) const;
 
   /// Get the entire name of this function.
-  char *getFunctionName(char *Buf, size_t *N) const;
+  DEMANGLE_ABI char *getFunctionName(char *Buf, size_t *N) const;
 
   /// Get the parameters for this function.
-  char *getFunctionParameters(char *Buf, size_t *N) const;
-  char *getFunctionReturnType(char *Buf, size_t *N) const;
+  DEMANGLE_ABI char *getFunctionParameters(char *Buf, size_t *N) const;
+  DEMANGLE_ABI char *getFunctionReturnType(char *Buf, size_t *N) const;
 
   /// If this function has any cv or reference qualifiers. These imply that
   /// the function is a non-static member function.
-  bool hasFunctionQualifiers() const;
+  DEMANGLE_ABI bool hasFunctionQualifiers() const;
 
   /// If this symbol describes a constructor or destructor.
-  bool isCtorOrDtor() const;
+  DEMANGLE_ABI bool isCtorOrDtor() const;
 
   /// If this symbol describes a function.
-  bool isFunction() const;
+  DEMANGLE_ABI bool isFunction() const;
 
   /// If this symbol describes a variable.
-  bool isData() const;
+  DEMANGLE_ABI bool isData() const;
 
   /// If this symbol is a <special-name>. These are generally implicitly
   /// generated by the implementation, such as vtables and typeinfo names.
-  bool isSpecialName() const;
+  DEMANGLE_ABI bool isSpecialName() const;
 
-  ~ItaniumPartialDemangler();
+  DEMANGLE_ABI ~ItaniumPartialDemangler();
 
 private:
   void *RootNode;
diff --git a/llvm/include/llvm/Demangle/DemangleConfig.h b/llvm/include/llvm/Demangle/DemangleConfig.h
index 30f72ff..8807a0e 100644
--- a/llvm/include/llvm/Demangle/DemangleConfig.h
+++ b/llvm/include/llvm/Demangle/DemangleConfig.h
@@ -94,4 +94,24 @@
 #define DEMANGLE_NAMESPACE_BEGIN namespace llvm { namespace itanium_demangle {
 #define DEMANGLE_NAMESPACE_END } }
 
+/// DEMANGLE_ABI is the export/visibility macro used to mark symbols delcared in
+/// llvm/Demangle as exported when built as a shared library.
+#if defined(LLVM_BUILD_STATIC) || !defined(LLVM_ENABLE_LLVM_EXPORT_ANNOTATIONS)
+#define DEMANGLE_ABI
+#else
+#if defined(_WIN32)
+#if defined(LLVM_EXPORTS)
+#define DEMANGLE_ABI __declspec(dllexport)
+#else
+#define DEMANGLE_ABI__declspec(dllimport)
+#endif
+#else
+#if __has_attribute(visibility)
+#define DEMANGLE_ABI __attribute__((__visibility__("default")))
+#else
+#define DEMANGLE_ABI
+#endif
+#endif
+#endif
+
 #endif
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 5533652..62d427c 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -3049,7 +3049,8 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
   Node *parse(bool ParseParams = true);
 };
 
-const char* parse_discriminator(const char* first, const char* last);
+DEMANGLE_ABI const char *parse_discriminator(const char *first,
+                                             const char *last);
 
 // <name> ::= <nested-name> // N
 //        ::= <local-name> # See Scope Encoding below  // Z
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangle.h b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
index b9a25e3..a2af875 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangle.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
@@ -10,6 +10,7 @@
 #define LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
 
 #include "llvm/Demangle/Demangle.h"
+#include "llvm/Demangle/DemangleConfig.h"
 #include "llvm/Demangle/MicrosoftDemangleNodes.h"
 
 #include <cassert>
@@ -151,14 +152,14 @@ public:
 
   // You are supposed to call parse() first and then check if error is true.  If
   // it is false, call output() to write the formatted name to the given stream.
-  SymbolNode *parse(std::string_view &MangledName);
+  DEMANGLE_ABI SymbolNode *parse(std::string_view &MangledName);
 
-  TagTypeNode *parseTagUniqueName(std::string_view &MangledName);
+  DEMANGLE_ABI TagTypeNode *parseTagUniqueName(std::string_view &MangledName);
 
   // True if an error occurred.
   bool Error = false;
 
-  void dumpBackReferences();
+  DEMANGLE_ABI void dumpBackReferences();
 
 private:
   SymbolNode *demangleEncodedSymbol(std::string_view &MangledName,
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index a9cfe72..155cfe8 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_DEMANGLE_MICROSOFTDEMANGLENODES_H
 #define LLVM_DEMANGLE_MICROSOFTDEMANGLENODES_H
 
+#include "DemangleConfig.h"
 #include <array>
 #include <cstdint>
 #include <string>
@@ -281,7 +282,7 @@ struct Node {
 
   virtual void output(OutputBuffer &OB, OutputFlags Flags) const = 0;
 
-  std::string toString(OutputFlags Flags = OF_Default) const;
+  DEMANGLE_ABI std::string toString(OutputFlags Flags = OF_Default) const;
 
 private:
   NodeKind Kind;
@@ -332,7 +333,7 @@ struct TypeNode : public Node {
   Qualifiers Quals = Q_None;
 };
 
-struct PrimitiveTypeNode : public TypeNode {
+struct DEMANGLE_ABI PrimitiveTypeNode : public TypeNode {
   explicit PrimitiveTypeNode(PrimitiveKind K)
       : TypeNode(NodeKind::PrimitiveType), PrimKind(K) {}
 
@@ -346,7 +347,7 @@ struct PrimitiveTypeNode : public TypeNode {
   PrimitiveKind PrimKind;
 };
 
-struct FunctionSignatureNode : public TypeNode {
+struct DEMANGLE_ABI FunctionSignatureNode : public TypeNode {
   explicit FunctionSignatureNode(NodeKind K) : TypeNode(K) {}
   FunctionSignatureNode() : TypeNode(NodeKind::FunctionSignature) {}
 
@@ -394,10 +395,11 @@ struct IdentifierNode : public Node {
   NodeArrayNode *TemplateParams = nullptr;
 
 protected:
-  void outputTemplateParameters(OutputBuffer &OB, OutputFlags Flags) const;
+  DEMANGLE_ABI void outputTemplateParameters(OutputBuffer &OB,
+                                             OutputFlags Flags) const;
 };
 
-struct VcallThunkIdentifierNode : public IdentifierNode {
+struct DEMANGLE_ABI VcallThunkIdentifierNode : public IdentifierNode {
   VcallThunkIdentifierNode() : IdentifierNode(NodeKind::VcallThunkIdentifier) {}
 
   void output(OutputBuffer &OB, OutputFlags Flags) const override;
@@ -409,7 +411,7 @@ struct VcallThunkIdentifierNode : public IdentifierNode {
   uint64_t OffsetInVTable = 0;
 };
 
-struct DynamicStructorIdentifierNode : public IdentifierNode {
+struct DEMANGLE_ABI DynamicStructorIdentifierNode : public IdentifierNode {
   DynamicStructorIdentifierNode()
       : IdentifierNode(NodeKind::DynamicStructorIdentifier) {}
 
@@ -424,7 +426,7 @@ struct DynamicStructorIdentifierNode : public IdentifierNode {
   bool IsDestructor = false;
 };
 
-struct NamedIdentifierNode : public IdentifierNode {
+struct DEMANGLE_ABI NamedIdentifierNode : public IdentifierNode {
   NamedIdentifierNode() : IdentifierNode(NodeKind::NamedIdentifier) {}
 
   void output(OutputBuffer &OB, OutputFlags Flags) const override;
@@ -436,7 +438,7 @@ struct NamedIdentifierNode : public IdentifierNode {
   std::string_view Name;
 };
 
-struct IntrinsicFunctionIdentifierNode : public IdentifierNode {
+struct DEMANGLE_ABI IntrinsicFunctionIdentifierNode : public IdentifierNode {
   explicit IntrinsicFunctionIdentifierNode(IntrinsicFunctionKind Operator)
       : IdentifierNode(NodeKind::IntrinsicFunctionIdentifier),
         Operator(Operator) {}
@@ -450,7 +452,7 @@ struct IntrinsicFunctionIdentifierNode : public IdentifierNode {
   IntrinsicFunctionKind Operator;
 };
 
-struct LiteralOperatorIdentifierNode : public IdentifierNode {
+struct DEMANGLE_ABI LiteralOperatorIdentifierNode : public IdentifierNode {
   LiteralOperatorIdentifierNode()
       : IdentifierNode(NodeKind::LiteralOperatorIdentifier) {}
 
@@ -463,7 +465,7 @@ struct LiteralOperatorIdentifierNode : public IdentifierNode {
   std::string_view Name;
 };
 
-struct LocalStaticGuardIdentifierNode : public IdentifierNode {
+struct DEMANGLE_ABI LocalStaticGuardIdentifierNode : public IdentifierNode {
   LocalStaticGuardIdentifierNode()
       : IdentifierNode(NodeKind::LocalStaticGuardIdentifier) {}
 
@@ -477,7 +479,7 @@ struct LocalStaticGuardIdentifierNode : public IdentifierNode {
   uint32_t ScopeIndex = 0;
 };
 
-struct ConversionOperatorIdentifierNode : public IdentifierNode {
+struct DEMANGLE_ABI ConversionOperatorIdentifierNode : public IdentifierNode {
   ConversionOperatorIdentifierNode()
       : IdentifierNode(NodeKind::ConversionOperatorIdentifier) {}
 
@@ -491,7 +493,7 @@ struct ConversionOperatorIdentifierNode : public IdentifierNode {
   TypeNode *TargetType = nullptr;
 };
 
-struct StructorIdentifierNode : public IdentifierNode {
+struct DEMANGLE_ABI StructorIdentifierNode : public IdentifierNode {
   StructorIdentifierNode() : IdentifierNode(NodeKind::StructorIdentifier) {}
   explicit StructorIdentifierNode(bool IsDestructor)
       : IdentifierNode(NodeKind::StructorIdentifier),
@@ -508,7 +510,7 @@ struct StructorIdentifierNode : public IdentifierNode {
   bool IsDestructor = false;
 };
 
-struct ThunkSignatureNode : public FunctionSignatureNode {
+struct DEMANGLE_ABI ThunkSignatureNode : public FunctionSignatureNode {
   ThunkSignatureNode() : FunctionSignatureNode(NodeKind::ThunkSignature) {}
 
   void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
@@ -528,7 +530,7 @@ struct ThunkSignatureNode : public FunctionSignatureNode {
   ThisAdjustor ThisAdjust;
 };
 
-struct PointerTypeNode : public TypeNode {
+struct DEMANGLE_ABI PointerTypeNode : public TypeNode {
   PointerTypeNode() : TypeNode(NodeKind::PointerType) {}
   void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
   void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
@@ -550,7 +552,7 @@ struct PointerTypeNode : public TypeNode {
   TypeNode *Pointee = nullptr;
 };
 
-struct TagTypeNode : public TypeNode {
+struct DEMANGLE_ABI TagTypeNode : public TypeNode {
   explicit TagTypeNode(TagKind Tag) : TypeNode(NodeKind::TagType), Tag(Tag) {}
 
   void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
@@ -562,7 +564,7 @@ struct TagTypeNode : public TypeNode {
   TagKind Tag;
 };
 
-struct ArrayTypeNode : public TypeNode {
+struct DEMANGLE_ABI ArrayTypeNode : public TypeNode {
   ArrayTypeNode() : TypeNode(NodeKind::ArrayType) {}
 
   void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
@@ -591,7 +593,7 @@ struct IntrinsicNode : public TypeNode {
   }
 };
 
-struct CustomTypeNode : public TypeNode {
+struct DEMANGLE_ABI CustomTypeNode : public TypeNode {
   CustomTypeNode() : TypeNode(NodeKind::Custom) {}
 
   void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
@@ -602,7 +604,7 @@ struct CustomTypeNode : public TypeNode {
   IdentifierNode *Identifier = nullptr;
 };
 
-struct NodeArrayNode : public Node {
+struct DEMANGLE_ABI NodeArrayNode : public Node {
   NodeArrayNode() : Node(NodeKind::NodeArray) {}
 
   void output(OutputBuffer &OB, OutputFlags Flags) const override;
@@ -618,7 +620,7 @@ struct NodeArrayNode : public Node {
   size_t Count = 0;
 };
 
-struct QualifiedNameNode : public Node {
+struct DEMANGLE_ABI QualifiedNameNode : public Node {
   QualifiedNameNode() : Node(NodeKind::QualifiedName) {}
 
   void output(OutputBuffer &OB, OutputFlags Flags) const override;
@@ -635,7 +637,7 @@ struct QualifiedNameNode : public Node {
   }
 };
 
-struct TemplateParameterReferenceNode : public Node {
+struct DEMANGLE_ABI TemplateParameterReferenceNode : public Node {
   TemplateParameterReferenceNode()
       : Node(NodeKind::TemplateParameterReference) {}
 
@@ -653,7 +655,7 @@ struct TemplateParameterReferenceNode : public Node {
   bool IsMemberPointer = false;
 };
 
-struct IntegerLiteralNode : public Node {
+struct DEMANGLE_ABI IntegerLiteralNode : public Node {
   IntegerLiteralNode() : Node(NodeKind::IntegerLiteral) {}
   IntegerLiteralNode(uint64_t Value, bool IsNegative)
       : Node(NodeKind::IntegerLiteral), Value(Value), IsNegative(IsNegative) {}
@@ -668,7 +670,7 @@ struct IntegerLiteralNode : public Node {
   bool IsNegative = false;
 };
 
-struct RttiBaseClassDescriptorNode : public IdentifierNode {
+struct DEMANGLE_ABI RttiBaseClassDescriptorNode : public IdentifierNode {
   RttiBaseClassDescriptorNode()
       : IdentifierNode(NodeKind::RttiBaseClassDescriptor) {}
 
@@ -684,7 +686,7 @@ struct RttiBaseClassDescriptorNode : public IdentifierNode {
   uint32_t Flags = 0;
 };
 
-struct SymbolNode : public Node {
+struct DEMANGLE_ABI SymbolNode : public Node {
   explicit SymbolNode(NodeKind K) : Node(K) {}
   void output(OutputBuffer &OB, OutputFlags Flags) const override;
 
@@ -696,7 +698,7 @@ struct SymbolNode : public Node {
   QualifiedNameNode *Name = nullptr;
 };
 
-struct SpecialTableSymbolNode : public SymbolNode {
+struct DEMANGLE_ABI SpecialTableSymbolNode : public SymbolNode {
   explicit SpecialTableSymbolNode()
       : SymbolNode(NodeKind::SpecialTableSymbol) {}
 
@@ -710,7 +712,7 @@ struct SpecialTableSymbolNode : public SymbolNode {
   Qualifiers Quals = Qualifiers::Q_None;
 };
 
-struct LocalStaticGuardVariableNode : public SymbolNode {
+struct DEMANGLE_ABI LocalStaticGuardVariableNode : public SymbolNode {
   LocalStaticGuardVariableNode()
       : SymbolNode(NodeKind::LocalStaticGuardVariable) {}
 
@@ -723,7 +725,7 @@ struct LocalStaticGuardVariableNode : public SymbolNode {
   bool IsVisible = false;
 };
 
-struct EncodedStringLiteralNode : public SymbolNode {
+struct DEMANGLE_ABI EncodedStringLiteralNode : public SymbolNode {
   EncodedStringLiteralNode() : SymbolNode(NodeKind::EncodedStringLiteral) {}
 
   void output(OutputBuffer &OB, OutputFlags Flags) const override;
@@ -737,7 +739,7 @@ struct EncodedStringLiteralNode : public SymbolNode {
   CharKind Char = CharKind::Char;
 };
 
-struct VariableSymbolNode : public SymbolNode {
+struct DEMANGLE_ABI VariableSymbolNode : public SymbolNode {
   VariableSymbolNode() : SymbolNode(NodeKind::VariableSymbol) {}
 
   void output(OutputBuffer &OB, OutputFlags Flags) const override;
@@ -750,7 +752,7 @@ struct VariableSymbolNode : public SymbolNode {
   TypeNode *Type = nullptr;
 };
 
-struct FunctionSymbolNode : public SymbolNode {
+struct DEMANGLE_ABI FunctionSymbolNode : public SymbolNode {
   FunctionSymbolNode() : SymbolNode(NodeKind::FunctionSymbol) {}
 
   void output(OutputBuffer &OB, OutputFlags Flags) const override;
@@ -762,7 +764,7 @@ struct FunctionSymbolNode : public SymbolNode {
   FunctionSignatureNode *Signature = nullptr;
 };
 
-struct PointerAuthQualifierNode : public Node {
+struct DEMANGLE_ABI PointerAuthQualifierNode : public Node {
   PointerAuthQualifierNode() : Node(NodeKind::PointerAuthQualifier) {}
 
   // __ptrauth takes three arguments:
diff --git a/llvm/include/llvm/ExecutionEngine/MCJIT.h b/llvm/include/llvm/ExecutionEngine/MCJIT.h
index c836c06..1e035c0 100644
--- a/llvm/include/llvm/ExecutionEngine/MCJIT.h
+++ b/llvm/include/llvm/ExecutionEngine/MCJIT.h
@@ -15,8 +15,8 @@
 #define LLVM_EXECUTIONENGINE_MCJIT_H
 
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Support/AlwaysTrue.h"
 #include "llvm/Support/Compiler.h"
-#include <cstdlib>
 
 extern "C" LLVM_ABI void LLVMLinkInMCJIT();
 
@@ -24,13 +24,11 @@ namespace {
   struct ForceMCJITLinking {
     ForceMCJITLinking() {
       // We must reference MCJIT in such a way that compilers will not
-      // delete it all as dead code, even with whole program optimization,
-      // yet is effectively a NO-OP. As the compiler isn't smart enough
-      // to know that getenv() never returns -1, this will do the job.
-      // This is so that globals in the translation units where these functions
-      // are defined are forced to be initialized, populating various
-      // registries.
-      if (std::getenv("bar") != (char*) -1)
+      // delete it all as dead code, even with whole program optimization, yet
+      // is effectively a NO-OP. This is so that globals in the translation
+      // units where these functions are defined are forced to be initialized,
+      // populating various registries.
+      if (llvm::getNonFoldableAlwaysTrue())
         return;
 
       LLVMLinkInMCJIT();
diff --git a/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h b/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h
index 0aa122f..6fa51ed 100644
--- a/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h
+++ b/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h
@@ -15,6 +15,8 @@
 #define LLVM_FRONTEND_HLSL_ROOTSIGNATUREMETADATA_H
 
 #include "llvm/Frontend/HLSL/HLSLRootSignature.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/DXContainerRootSignature.h"
 
 namespace llvm {
 class LLVMContext;
@@ -49,6 +51,48 @@ private:
   SmallVector<Metadata *> GeneratedMetadata;
 };
 
+enum class RootSignatureElementKind {
+  Error = 0,
+  RootFlags = 1,
+  RootConstants = 2,
+  SRV = 3,
+  UAV = 4,
+  CBV = 5,
+  DescriptorTable = 6,
+  StaticSamplers = 7
+};
+
+class MetadataParser {
+public:
+  MetadataParser(MDNode *Root) : Root(Root) {}
+
+  LLVM_ABI bool ParseRootSignature(LLVMContext *Ctx,
+                                   mcdxbc::RootSignatureDesc &RSD);
+
+private:
+  bool parseRootFlags(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
+                      MDNode *RootFlagNode);
+  bool parseRootConstants(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
+                          MDNode *RootConstantNode);
+  bool parseRootDescriptors(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
+                            MDNode *RootDescriptorNode,
+                            RootSignatureElementKind ElementKind);
+  bool parseDescriptorRange(LLVMContext *Ctx, mcdxbc::DescriptorTable &Table,
+                            MDNode *RangeDescriptorNode);
+  bool parseDescriptorTable(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
+                            MDNode *DescriptorTableNode);
+  bool parseRootSignatureElement(LLVMContext *Ctx,
+                                 mcdxbc::RootSignatureDesc &RSD,
+                                 MDNode *Element);
+  bool parseStaticSampler(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
+                          MDNode *StaticSamplerNode);
+
+  bool validateRootSignature(LLVMContext *Ctx,
+                             const llvm::mcdxbc::RootSignatureDesc &RSD);
+
+  MDNode *Root;
+};
+
 } // namespace rootsig
 } // namespace hlsl
 } // namespace llvm
diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index de888ff..7919f7a 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -779,16 +779,17 @@ struct LinkT {
 template <typename T, typename I, typename E> //
 struct MapT {
   using LocatorList = ObjectListT<I, E>;
-  ENUM(MapType, To, From, Tofrom, Alloc, Release, Delete);
-  ENUM(MapTypeModifier, Always, Close, Present, OmpxHold);
+  ENUM(MapType, To, From, Tofrom, Storage);
+  ENUM(MapTypeModifier, Always, Close, Delete, Present, Self, OmpxHold);
+  ENUM(RefModifier, RefPtee, RefPtr, RefPtrPtee);
   // See note at the definition of the MapperT type.
   using Mappers = ListT<type::MapperT<I, E>>; // Not a spec name
   using Iterator = type::IteratorT<T, I, E>;
   using MapTypeModifiers = ListT<MapTypeModifier>; // Not a spec name
 
   using TupleTrait = std::true_type;
-  std::tuple<OPT(MapType), OPT(MapTypeModifiers), OPT(Mappers), OPT(Iterator),
-             LocatorList>
+  std::tuple<OPT(MapType), OPT(MapTypeModifiers), OPT(RefModifier),
+             OPT(Mappers), OPT(Iterator), LocatorList>
       t;
 };
 
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
index 611bfe3..047baa3 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -708,6 +708,7 @@ bool ConstructDecompositionT<C, H>::applyClause(
                      tomp::clause::MapT<TypeTy, IdTy, ExprTy>{
                          {/*MapType=*/MapType::Tofrom,
                           /*MapTypeModifier=*/std::nullopt,
+                          /*RefModifier=*/std::nullopt,
                           /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
                           /*LocatorList=*/std::move(tofrom)}});
       dirTarget->clauses.push_back(map);
@@ -969,8 +970,8 @@ bool ConstructDecompositionT<C, H>::applyClause(
           llvm::omp::Clause::OMPC_map,
           tomp::clause::MapT<TypeTy, IdTy, ExprTy>{
               {/*MapType=*/MapType::Tofrom, /*MapTypeModifier=*/std::nullopt,
-               /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
-               /*LocatorList=*/std::move(tofrom)}});
+               /*RefModifier=*/std::nullopt, /*Mapper=*/std::nullopt,
+               /*Iterator=*/std::nullopt, /*LocatorList=*/std::move(tofrom)}});
 
       dirTarget->clauses.push_back(map);
       applied = true;
diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h
index d68491e..ef761eb 100644
--- a/llvm/include/llvm/IR/CallingConv.h
+++ b/llvm/include/llvm/IR/CallingConv.h
@@ -284,6 +284,9 @@ namespace CallingConv {
     RISCV_VLSCall_32768 = 122,
     RISCV_VLSCall_65536 = 123,
 
+    // Calling convention for AMDGPU whole wave functions.
+    AMDGPU_Gfx_WholeWave = 124,
+
     /// The highest possible ID. Must be some 2^k - 1.
     MaxID = 1023
   };
@@ -294,8 +297,13 @@ namespace CallingConv {
 /// directly or indirectly via a call-like instruction.
 constexpr bool isCallableCC(CallingConv::ID CC) {
   switch (CC) {
+  // Called with special intrinsics:
+  // llvm.amdgcn.cs.chain
   case CallingConv::AMDGPU_CS_Chain:
   case CallingConv::AMDGPU_CS_ChainPreserve:
+  // llvm.amdgcn.call.whole.wave
+  case CallingConv::AMDGPU_Gfx_WholeWave:
+  // Hardware entry points:
   case CallingConv::AMDGPU_CS:
   case CallingConv::AMDGPU_ES:
   case CallingConv::AMDGPU_GS:
diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h
index f8241a3..c529a86 100644
--- a/llvm/include/llvm/IR/DebugInfo.h
+++ b/llvm/include/llvm/IR/DebugInfo.h
@@ -39,30 +39,26 @@ class DbgVariableRecord;
 class Instruction;
 class Module;
 
-/// Finds dbg.declare intrinsics declaring local variables as living in the
+/// Finds dbg.declare records declaring local variables as living in the
 /// memory that 'V' points to.
-LLVM_ABI TinyPtrVector<DbgDeclareInst *> findDbgDeclares(Value *V);
-/// As above, for DVRDeclares.
 LLVM_ABI TinyPtrVector<DbgVariableRecord *> findDVRDeclares(Value *V);
 /// As above, for DVRValues.
 LLVM_ABI TinyPtrVector<DbgVariableRecord *> findDVRValues(Value *V);
 
-/// Finds the llvm.dbg.value intrinsics describing a value.
-LLVM_ABI void findDbgValues(
-    SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V,
-    SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords = nullptr);
-
-/// Finds the debug info intrinsics describing a value.
-LLVM_ABI void findDbgUsers(
-    SmallVectorImpl<DbgVariableIntrinsic *> &DbgInsts, Value *V,
-    SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords = nullptr);
+/// Finds the debug info records describing a value.
+LLVM_ABI void
+findDbgUsers(Value *V,
+             SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords);
+/// Finds the dbg.values describing a value.
+LLVM_ABI void
+findDbgValues(Value *V,
+              SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords);
 
 /// Find subprogram that is enclosing this scope.
 LLVM_ABI DISubprogram *getDISubprogram(const MDNode *Scope);
 
 /// Produce a DebugLoc to use for each dbg.declare that is promoted to a
 /// dbg.value.
-LLVM_ABI DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII);
 LLVM_ABI DebugLoc getDebugValueLoc(DbgVariableRecord *DVR);
 
 /// Strip debug info in the module if it exists.
@@ -192,13 +188,6 @@ using AssignmentInstRange =
 /// Iterators invalidated by adding or removing DIAssignID metadata to/from any
 /// instruction (including by deleting or cloning instructions).
 LLVM_ABI AssignmentInstRange getAssignmentInsts(DIAssignID *ID);
-/// Return a range of instructions (typically just one) that perform the
-/// assignment that \p DAI encodes.
-/// Iterators invalidated by adding or removing DIAssignID metadata to/from any
-/// instruction (including by deleting or cloning instructions).
-inline AssignmentInstRange getAssignmentInsts(const DbgAssignIntrinsic *DAI) {
-  return getAssignmentInsts(DAI->getAssignID());
-}
 
 inline AssignmentInstRange getAssignmentInsts(const DbgVariableRecord *DVR) {
   assert(DVR->isDbgAssign() &&
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index ecda6c4..3a7db6d 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+def flat_ptr_ty : LLVMQualPointerType<0>;
 def global_ptr_ty : LLVMQualPointerType<1>;
 def local_ptr_ty : LLVMQualPointerType<3>;
 
@@ -3045,6 +3046,24 @@ def int_amdgcn_ds_bpermute_fi_b32 :
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
     [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
 
+def int_amdgcn_flat_prefetch : ClangBuiltin<"__builtin_amdgcn_flat_prefetch">,
+  Intrinsic<[],
+  [llvm_ptr_ty,    // Pointer
+   llvm_i32_ty],   // cachepolicy(imm), bits [0-2] = th, bits [3-4] = scope
+    [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>,
+     IntrNoCallback, IntrNoFree, ImmArg<ArgIndex<1>>],
+    "", [SDNPMemOperand]
+  >;
+
+def int_amdgcn_global_prefetch : ClangBuiltin<"__builtin_amdgcn_global_prefetch">,
+  Intrinsic<[],
+  [LLVMQualPointerType<1>,    // Pointer
+   llvm_i32_ty],              // cachepolicy(imm), bits [0-2] = th, bits [3-4] = scope
+    [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>,
+     IntrNoCallback, IntrNoFree, ImmArg<ArgIndex<1>>],
+    "", [SDNPMemOperand]
+  >;
+
 //===----------------------------------------------------------------------===//
 // Deep learning intrinsics.
 //===----------------------------------------------------------------------===//
@@ -3717,6 +3736,20 @@ class AMDGPUWmmaIntrinsicModsAllDiff<LLVMType DstTy, LLVMType AB, LLVMType C> :
      IntrWillReturn, IntrNoCallback, IntrNoFree]
 >;
 
+class AMDGPUWmmaIntrinsicModsC_MatrixFMT :
+  Intrinsic<
+    [llvm_anyfloat_ty], // %D
+    [
+      llvm_i32_ty,      // matrix_a_fmt
+      llvm_anyint_ty,   // %A
+      llvm_i32_ty,      // matrix_b_fmt
+      llvm_anyint_ty,   // %B
+      llvm_i16_ty,      // %C_mod: 0 - none, 1 - neg, 2 - abs, 3 - neg(abs)
+      LLVMMatchType<0>, // %C
+    ],
+    [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
+>;
+
 defset list<Intrinsic> AMDGPUWMMAIntrinsicsGFX1250 = {
 def int_amdgcn_wmma_f32_16x16x4_f32       : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>;
 def int_amdgcn_wmma_f32_16x16x32_bf16     : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>;
@@ -3741,6 +3774,7 @@ def int_amdgcn_wmma_f32_16x16x128_fp8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint
 def int_amdgcn_wmma_f32_16x16x128_bf8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
 def int_amdgcn_wmma_f32_16x16x128_bf8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
 def int_amdgcn_wmma_i32_16x16x64_iu8      : AMDGPUWmmaIntrinsicModsAB<llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_wmma_f32_16x16x128_f8f6f4  : AMDGPUWmmaIntrinsicModsC_MatrixFMT;
 def int_amdgcn_wmma_f32_32x16x128_f4       : AMDGPUWmmaIntrinsicF4ModsC<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty>;
 }
 
@@ -3813,6 +3847,26 @@ def int_amdgcn_tensor_load_to_lds_d2 :
 def int_amdgcn_tensor_store_from_lds_d2 :
   ClangBuiltin<"__builtin_amdgcn_tensor_store_from_lds_d2">, AMDGPUTensorLoadStoreD2;
 
+class AMDGPULoadMonitor<LLVMType ptr_ty>:
+  Intrinsic<
+    [llvm_any_ty],
+    [ptr_ty,
+     llvm_i32_ty],  // gfx12+ cachepolicy:
+                    //   bits [0-2] = th
+                    //   bits [3-4] = scope
+    [IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
+     IntrWillReturn, IntrConvergent, IntrNoCallback, IntrNoFree],
+    "",
+    [SDNPMemOperand]
+  >;
+
+def int_amdgcn_flat_load_monitor_b32    : AMDGPULoadMonitor<flat_ptr_ty>;
+def int_amdgcn_flat_load_monitor_b64    : AMDGPULoadMonitor<flat_ptr_ty>;
+def int_amdgcn_flat_load_monitor_b128   : AMDGPULoadMonitor<flat_ptr_ty>;
+def int_amdgcn_global_load_monitor_b32  : AMDGPULoadMonitor<global_ptr_ty>;
+def int_amdgcn_global_load_monitor_b64  : AMDGPULoadMonitor<global_ptr_ty>;
+def int_amdgcn_global_load_monitor_b128 : AMDGPULoadMonitor<global_ptr_ty>;
+
 /// Emit an addrspacecast without null pointer checking.
 /// Should only be inserted by a pass based on analysis of an addrspacecast's src.
 def int_amdgcn_addrspacecast_nonnull : DefaultAttrsIntrinsic<
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 0375f29..967d166 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -331,6 +331,11 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
     !eq(gf,"m8n16:x2") : !listsplat(llvm_i32_ty, 2),
     !eq(gf,"m8n16:x4") : !listsplat(llvm_i32_ty, 4),
 
+    // stmatrix b8 -> s32 @ m16n8
+    !eq(gf,"m16n8:x1") : !listsplat(llvm_i32_ty, 1),
+    !eq(gf,"m16n8:x2") : !listsplat(llvm_i32_ty, 2),
+    !eq(gf,"m16n8:x4") : !listsplat(llvm_i32_ty, 4),
+
   );
 }
 
@@ -403,6 +408,17 @@ class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> {
                   !subst("llvm.", "int_", intr));
 }
 
+class STMATRIX_NAME<WMMA_REGS Frag, int Trans> {
+  string intr = "llvm.nvvm.stmatrix.sync.aligned"
+                # "." # Frag.geom
+                # "." # Frag.frag
+                # !if(Trans, ".trans", "")
+                # "." # Frag.ptx_elt_type
+                ;
+  string record = !subst(".", "_",
+                  !subst("llvm.", "int_", intr));
+}
+
 // Generates list of 4-tuples of WMMA_REGS representing a valid MMA op.
 //   Geom: list of supported geometries.
 //   TypeN: PTX type of the corresponding fragment's element.
@@ -443,6 +459,16 @@ class LDMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
    list<string> ops = !foreach(x, ret, x.gft);
 }
 
+class STMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
+  list<WMMA_REGS> ret =
+     !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
+     !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
+     !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
+            [WMMA_REGS<geom, frag, type>]))))));
+   // Debugging aid for readable representation of the list above.
+   list<string> ops = !foreach(x, ret, x.gft);
+}
+
 // Creates list of valid combinations of fragments. This is the main list that
 // drives generation of corresponding intrinsics and instructions.
 class NVVM_MMA_OPS {
@@ -537,9 +563,18 @@ class NVVM_MMA_OPS {
   list<WMMA_REGS> ldmatrix_geom_m8n16_ops = LDMATRIX_OPS<
     ["m8n16"], ["x1", "x2", "x4"], ["b8x16.b6x16_p32", "b8x16.b4x16_p64"]>.ret;
 
+  list<WMMA_REGS> stmatrix_b16_ops = STMATRIX_OPS<
+    ["m8n8"], ["x1", "x2", "x4"], ["b16"]>.ret;
+
+  list<WMMA_REGS> stmatrix_b8_ops = STMATRIX_OPS<
+    ["m16n8"], ["x1", "x2", "x4"], ["b8"]>.ret;
+
   list<WMMA_REGS> all_ldmatrix_ops = !listconcat(ldmatrix_b16_ops,
                                                  ldmatrix_geom_m16n16_ops,
                                                  ldmatrix_geom_m8n16_ops);
+
+  list<WMMA_REGS> all_stmatrix_ops = !listconcat(stmatrix_b16_ops,
+                                                 stmatrix_b8_ops);
 }
 
 def NVVM_MMA_OPS : NVVM_MMA_OPS;
@@ -680,6 +715,19 @@ class NVVM_LDMATRIX_SUPPORTED<WMMA_REGS frag, bit trans> {
   );
 }
 
+// Returns true if the fragment is valid for stmatrix ops is supported;
+// false otherwise.
+class NVVM_STMATRIX_SUPPORTED<WMMA_REGS frag, bit trans> {
+  string g = frag.geom;
+  string t = frag.ptx_elt_type;
+
+  bit ret = !cond(
+    !and(!eq(g, "m8n8"), !eq(t, "b16")): true,
+    !and(!eq(g, "m16n8"), !eq(t, "b8"), !eq(trans, 1)): true,
+    true: false
+  );
+}
+
 class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
   string Suffix = !if(sync, "sync_", "")
                   # mode # "_"
@@ -1969,6 +2017,23 @@ foreach transposed = [0, 1] in {
   }
 }
 
+// STMATRIX
+class NVVM_STMATRIX<WMMA_REGS Frag, int Transposed>
+  : Intrinsic<[],
+          !listconcat([llvm_anyptr_ty], Frag.regs),
+          [IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
+           WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
+          STMATRIX_NAME<Frag, Transposed>.intr>;
+
+foreach transposed = [0, 1] in {
+  foreach frag = NVVM_MMA_OPS.all_stmatrix_ops in {
+    if NVVM_STMATRIX_SUPPORTED<frag, transposed>.ret then {
+      def STMATRIX_NAME<frag, transposed>.record
+        : NVVM_STMATRIX<frag, transposed>;
+    }
+  }
+}
+
 // MAPA
 let IntrProperties = [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>] in {
   def int_nvvm_mapa
@@ -2024,9 +2089,7 @@ foreach dim = 1...5 in {
                       tensor_dim_args,      // actual tensor dims
                       [llvm_i64_ty]),       // cache_hint
           [llvm_i1_ty],                     // Flag for cache_hint
-          [IntrConvergent,
-           ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
-           NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>]>;
+          [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
 
     // Intrinsics for TMA Copy with reduction
     foreach red_op = ["add", "min", "max", "inc", "dec", "and", "or", "xor"] in
@@ -2037,18 +2100,31 @@ foreach dim = 1...5 in {
                          tensor_dim_args,     // actual tensor dims
                         [llvm_i64_ty]),       // cache_hint
           [llvm_i1_ty],                       // Flag for cache_hint
-          [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
-           NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>]>;
+          [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
   }
 }
 
+// TMA S2G tile::scatter4
+def int_nvvm_cp_async_bulk_tensor_s2g_tile_scatter4_2d
+  : DefaultAttrsIntrinsicFlags<[],
+      !listconcat([llvm_shared_ptr_ty,        // src_smem_ptr
+                   llvm_ptr_ty],              // tensormap_ptr
+                  !listsplat(llvm_i32_ty, 5), // dims
+                  [llvm_i64_ty]),             // cache_hint
+      [llvm_i1_ty],                           // Flag for cache_hint
+      [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
+
 // TMA Tensor Copy Intrinsics: G2S -> From Global to Shared memory variants
 foreach dim = 1...5 in {
   defvar tensor_dim_args = !listsplat(llvm_i32_ty, dim);
 
-  foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
+  foreach mode = !if(!ge(dim, 3), ["tile", "im2col", "im2col_w", "im2col_w_128"], ["tile"]) in {
     defvar is_im2col = !eq(mode, "im2col");
-    defvar num_im2col_offsets = !if(is_im2col, !add(dim, -2), 0);
+    defvar is_im2colw = !or(!eq(mode, "im2col_w"), !eq(mode, "im2col_w_128"));
+
+    // For im2col_w/w128 modes, the num_offsets is always 2.
+    // For im2col mode, the num_offsets is (dim - 2).
+    defvar num_im2col_offsets = !if(is_im2colw, 2, !if(is_im2col, !add(dim, -2), 0));
     defvar im2col_offsets_args = !listsplat(llvm_i16_ty, num_im2col_offsets);
 
     defvar g2s_params = !listconcat(
@@ -2079,11 +2155,60 @@ foreach dim = 1...5 in {
                        im2col_offsets_args, // im2col offsets
                       [llvm_i64_ty]),       // cache_hint
           [llvm_i1_ty],                     // Flag for cache_hint
-          [IntrConvergent,
-           ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+          [IntrConvergent, ReadOnly<ArgIndex<0>>]>;
+
+    def int_nvvm_cp_async_bulk_tensor_g2s_cta_ # mode # _ # dim # d :
+      DefaultAttrsIntrinsicFlags<[],
+          !listconcat([llvm_shared_ptr_ty,  // dst_ptr
+                       llvm_shared_ptr_ty,  // mbarrier_ptr
+                       llvm_ptr_ty],        // tensormap_ptr
+                       tensor_dim_args,     // actual tensor dims
+                       im2col_offsets_args, // im2col offsets
+                       [llvm_i64_ty]),      // cache_hint
+          [llvm_i1_ty],                     // Flag for cache_hint
+          [IntrConvergent, WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>]>;
   }
 }
 
+// TMA copy for tile::gather4
+def int_nvvm_cp_async_bulk_tensor_g2s_tile_gather4_2d
+  : DefaultAttrsIntrinsicFlags<[],
+      !listconcat(
+      [llvm_shared_cluster_ptr_ty, // dst_shared_cluster_ptr
+       llvm_shared_ptr_ty,         // mbarrier_ptr
+       llvm_ptr_ty],               // tensormap_ptr
+       !listsplat(llvm_i32_ty, 5), // co-ordinates
+      [llvm_i16_ty,                // cta_mask
+       llvm_i64_ty]),              // cache_hint
+      [llvm_i1_ty,                 // Flag for cta_mask
+       llvm_i1_ty,                 // Flag for cache_hint
+       llvm_i32_ty],               // Flag for cta_group
+      [IntrConvergent,
+       WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
+       // Allowed values for cta_group are {0,1,2} i.e [0, 3).
+       Range<ArgIndex<12>, 0, 3>]>;
+
+def int_nvvm_cp_async_bulk_tensor_g2s_cta_tile_gather4_2d
+  : DefaultAttrsIntrinsicFlags<[],
+      !listconcat(
+      [llvm_shared_ptr_ty,         // dst_shared_ptr
+       llvm_shared_ptr_ty,         // mbarrier_ptr
+       llvm_ptr_ty],               // tensormap_ptr
+       !listsplat(llvm_i32_ty, 5), // co-ordinates
+      [llvm_i64_ty]),              // cache_hint
+      [llvm_i1_ty],                // Flag for cache_hint
+      [IntrConvergent,
+       WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>]>;
+
+// TMA prefetch for tile::gather4
+def int_nvvm_cp_async_bulk_tensor_prefetch_tile_gather4_2d
+  : DefaultAttrsIntrinsicFlags<[],
+      !listconcat([llvm_ptr_ty],               // tensormap_ptr
+                  !listsplat(llvm_i32_ty, 5),  // co-ordinates
+                  [llvm_i64_ty]),              // cache_hint
+      [llvm_i1_ty],                            // Flag for cache_hint
+      [IntrConvergent, ReadOnly<ArgIndex<0>>]>;
+
 // Intrinsics for Prefetch and Prefetchu
 let IntrProperties = [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>] in {
   foreach level = ["L1", "L2"] in {
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index f592ff2..c1e4b97 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -43,6 +43,10 @@ def int_wasm_ref_is_null_exn :
   DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_exnref_ty], [IntrNoMem],
                         "llvm.wasm.ref.is_null.exn">;
 
+def int_wasm_ref_test_func
+    : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_vararg_ty],
+                            [IntrNoMem]>;
+
 //===----------------------------------------------------------------------===//
 // Table intrinsics
 //===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index 737610b..0fd5de3 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -112,7 +112,6 @@ inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
     return false;
   }
   llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
-  return false;
 }
 
 inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
@@ -179,7 +178,6 @@ inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
   }
   llvm_unreachable(
       "Checking invalid f2i/d2i intrinsic for signed int conversion");
-  return false;
 }
 
 inline APFloat::roundingMode
@@ -250,7 +248,6 @@ GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) {
     return APFloat::rmTowardZero;
   }
   llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
-  return APFloat::roundingMode::Invalid;
 }
 
 inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
@@ -280,7 +277,6 @@ inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
     return false;
   }
   llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
-  return false;
 }
 
 inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
@@ -310,7 +306,6 @@ inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
     return false;
   }
   llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
-  return false;
 }
 
 inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
@@ -340,7 +335,83 @@ inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
     return false;
   }
   llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
-  return false;
+}
+
+inline bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_ceil_ftz_f:
+  case Intrinsic::nvvm_fabs_ftz:
+  case Intrinsic::nvvm_floor_ftz_f:
+  case Intrinsic::nvvm_round_ftz_f:
+  case Intrinsic::nvvm_saturate_ftz_f:
+  case Intrinsic::nvvm_sqrt_rn_ftz_f:
+    return true;
+  case Intrinsic::nvvm_ceil_f:
+  case Intrinsic::nvvm_ceil_d:
+  case Intrinsic::nvvm_fabs:
+  case Intrinsic::nvvm_floor_f:
+  case Intrinsic::nvvm_floor_d:
+  case Intrinsic::nvvm_round_f:
+  case Intrinsic::nvvm_round_d:
+  case Intrinsic::nvvm_saturate_d:
+  case Intrinsic::nvvm_saturate_f:
+  case Intrinsic::nvvm_sqrt_f:
+  case Intrinsic::nvvm_sqrt_rn_d:
+  case Intrinsic::nvvm_sqrt_rn_f:
+    return false;
+  }
+  llvm_unreachable("Checking FTZ flag for invalid unary intrinsic");
+}
+
+inline bool RCPShouldFTZ(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_rcp_rm_ftz_f:
+  case Intrinsic::nvvm_rcp_rn_ftz_f:
+  case Intrinsic::nvvm_rcp_rp_ftz_f:
+  case Intrinsic::nvvm_rcp_rz_ftz_f:
+    return true;
+  case Intrinsic::nvvm_rcp_rm_d:
+  case Intrinsic::nvvm_rcp_rm_f:
+  case Intrinsic::nvvm_rcp_rn_d:
+  case Intrinsic::nvvm_rcp_rn_f:
+  case Intrinsic::nvvm_rcp_rp_d:
+  case Intrinsic::nvvm_rcp_rp_f:
+  case Intrinsic::nvvm_rcp_rz_d:
+  case Intrinsic::nvvm_rcp_rz_f:
+    return false;
+  }
+  llvm_unreachable("Checking FTZ flag for invalid rcp intrinsic");
+}
+
+inline APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_rcp_rm_f:
+  case Intrinsic::nvvm_rcp_rm_d:
+  case Intrinsic::nvvm_rcp_rm_ftz_f:
+    return APFloat::rmTowardNegative;
+
+  case Intrinsic::nvvm_rcp_rn_f:
+  case Intrinsic::nvvm_rcp_rn_d:
+  case Intrinsic::nvvm_rcp_rn_ftz_f:
+    return APFloat::rmNearestTiesToEven;
+
+  case Intrinsic::nvvm_rcp_rp_f:
+  case Intrinsic::nvvm_rcp_rp_d:
+  case Intrinsic::nvvm_rcp_rp_ftz_f:
+    return APFloat::rmTowardPositive;
+
+  case Intrinsic::nvvm_rcp_rz_f:
+  case Intrinsic::nvvm_rcp_rz_d:
+  case Intrinsic::nvvm_rcp_rz_ftz_f:
+    return APFloat::rmTowardZero;
+  }
+  llvm_unreachable("Checking rounding mode for invalid rcp intrinsic");
+}
+
+inline DenormalMode GetNVVMDenromMode(bool ShouldFTZ) {
+  if (ShouldFTZ)
+    return DenormalMode::getPreserveSign();
+  return DenormalMode::getIEEE();
 }
 
 } // namespace nvvm
diff --git a/llvm/include/llvm/IR/PassInstrumentation.h b/llvm/include/llvm/IR/PassInstrumentation.h
index 0315715..33eda5a 100644
--- a/llvm/include/llvm/IR/PassInstrumentation.h
+++ b/llvm/include/llvm/IR/PassInstrumentation.h
@@ -164,7 +164,7 @@ public:
 
   /// Add a class name to pass name mapping for use by pass instrumentation.
   LLVM_ABI void addClassToPassName(StringRef ClassName, StringRef PassName);
-  /// Get the pass name for a given pass class name.
+  /// Get the pass name for a given pass class name. Empty if no match found.
   LLVM_ABI StringRef getPassNameForClassName(StringRef ClassName);
 
 private:
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 50e50a9..27c5d5c 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -822,12 +822,52 @@ template <typename Class> struct bind_ty {
   }
 };
 
+/// Check whether the value has the given Class and matches the nested
+/// pattern. Capture it into the provided variable if successful.
+template <typename Class, typename MatchTy> struct bind_and_match_ty {
+  Class *&VR;
+  MatchTy Match;
+
+  bind_and_match_ty(Class *&V, const MatchTy &Match) : VR(V), Match(Match) {}
+
+  template <typename ITy> bool match(ITy *V) const {
+    auto *CV = dyn_cast<Class>(V);
+    if (CV && Match.match(V)) {
+      VR = CV;
+      return true;
+    }
+    return false;
+  }
+};
+
 /// Match a value, capturing it if we match.
 inline bind_ty<Value> m_Value(Value *&V) { return V; }
 inline bind_ty<const Value> m_Value(const Value *&V) { return V; }
 
+/// Match against the nested pattern, and capture the value if we match.
+template <typename MatchTy>
+inline bind_and_match_ty<Value, MatchTy> m_Value(Value *&V,
+                                                 const MatchTy &Match) {
+  return {V, Match};
+}
+
+/// Match against the nested pattern, and capture the value if we match.
+template <typename MatchTy>
+inline bind_and_match_ty<const Value, MatchTy> m_Value(const Value *&V,
+                                                       const MatchTy &Match) {
+  return {V, Match};
+}
+
 /// Match an instruction, capturing it if we match.
 inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
+
+/// Match against the nested pattern, and capture the instruction if we match.
+template <typename MatchTy>
+inline bind_and_match_ty<Instruction, MatchTy>
+m_Instruction(Instruction *&I, const MatchTy &Match) {
+  return {I, Match};
+}
+
 /// Match a unary operator, capturing it if we match.
 inline bind_ty<UnaryOperator> m_UnOp(UnaryOperator *&I) { return I; }
 /// Match a binary operator, capturing it if we match.
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 2e231cf..31801da 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -119,7 +119,6 @@ LLVM_ABI void initializeExpandVariadicsPass(PassRegistry &);
 LLVM_ABI void initializeExternalAAWrapperPassPass(PassRegistry &);
 LLVM_ABI void initializeFEntryInserterLegacyPass(PassRegistry &);
 LLVM_ABI void initializeFinalizeISelPass(PassRegistry &);
-LLVM_ABI void initializeFinalizeMachineBundlesPass(PassRegistry &);
 LLVM_ABI void initializeFixIrreduciblePass(PassRegistry &);
 LLVM_ABI void initializeFixupStatepointCallerSavedLegacyPass(PassRegistry &);
 LLVM_ABI void initializeFlattenCFGLegacyPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllIR.h b/llvm/include/llvm/LinkAllIR.h
index ceed784..894a8dd 100644
--- a/llvm/include/llvm/LinkAllIR.h
+++ b/llvm/include/llvm/LinkAllIR.h
@@ -21,6 +21,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/Support/AlwaysTrue.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Memory.h"
@@ -29,19 +30,16 @@
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Signals.h"
-#include <cstdlib>
 
 namespace {
   struct ForceVMCoreLinking {
     ForceVMCoreLinking() {
       // We must reference VMCore in such a way that compilers will not
-      // delete it all as dead code, even with whole program optimization,
-      // yet is effectively a NO-OP. As the compiler isn't smart enough
-      // to know that getenv() never returns -1, this will do the job.
+      // delete it all as dead code, even with whole program optimization.
       // This is so that globals in the translation units where these functions
       // are defined are forced to be initialized, populating various
       // registries.
-      if (std::getenv("bar") != (char*) -1)
+      if (llvm::getNonFoldableAlwaysTrue())
         return;
       llvm::LLVMContext Context;
       (void)new llvm::Module("", Context);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index bae7f0d..f82a439 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -34,6 +34,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/Support/AlwaysTrue.h"
 #include "llvm/Support/Valgrind.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
@@ -54,14 +55,12 @@ class Triple;
 namespace {
 struct ForcePassLinking {
   ForcePassLinking() {
-    // We must reference the passes in such a way that compilers will not
-    // delete it all as dead code, even with whole program optimization,
-    // yet is effectively a NO-OP. As the compiler isn't smart enough
-    // to know that getenv() never returns -1, this will do the job.
-    // This is so that globals in the translation units where these functions
-    // are defined are forced to be initialized, populating various
-    // registries.
-    if (std::getenv("bar") != (char *)-1)
+    // We must reference the passes in such a way that compilers will not delete
+    // it all as dead code, even with whole program optimization, yet is
+    // effectively a NO-OP. This is so that globals in the translation units
+    // where these functions are defined are forced to be initialized,
+    // populating various registries.
+    if (llvm::getNonFoldableAlwaysTrue())
       return;
 
     (void)llvm::createAtomicExpandLegacyPass();
diff --git a/llvm/include/llvm/MC/DXContainerRootSignature.h b/llvm/include/llvm/MC/DXContainerRootSignature.h
index 4b6b42f..14a2429 100644
--- a/llvm/include/llvm/MC/DXContainerRootSignature.h
+++ b/llvm/include/llvm/MC/DXContainerRootSignature.h
@@ -6,6 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_MC_DXCONTAINERROOTSIGNATURE_H
+#define LLVM_MC_DXCONTAINERROOTSIGNATURE_H
+
 #include "llvm/BinaryFormat/DXContainer.h"
 #include <cstdint>
 #include <limits>
@@ -116,3 +119,5 @@ struct RootSignatureDesc {
 };
 } // namespace mcdxbc
 } // namespace llvm
+
+#endif // LLVM_MC_DXCONTAINERROOTSIGNATURE_H
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index 0322cbe..bfc1175 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -18,9 +18,7 @@
 
 namespace llvm {
 
-class MCAlignFragment;
 class MCFragment;
-class MCLEBFragment;
 class MCSymbol;
 class MCAssembler;
 class MCContext;
@@ -60,6 +58,9 @@ protected: // Can only create subclasses.
 
   MCAssembler *Asm = nullptr;
 
+  bool AllowAutoPadding = false;
+  bool AllowEnhancedRelaxation = false;
+
 public:
   MCAsmBackend(const MCAsmBackend &) = delete;
   MCAsmBackend &operator=(const MCAsmBackend &) = delete;
@@ -73,11 +74,11 @@ public:
 
   /// Return true if this target might automatically pad instructions and thus
   /// need to emit padding enable/disable directives around sensative code.
-  virtual bool allowAutoPadding() const { return false; }
+  bool allowAutoPadding() const { return AllowAutoPadding; }
   /// Return true if this target allows an unrelaxable instruction to be
   /// emitted into RelaxableFragment and then we can increase its size in a
   /// tricky way for optimization.
-  virtual bool allowEnhancedRelaxation() const { return false; }
+  bool allowEnhancedRelaxation() const { return AllowEnhancedRelaxation; }
 
   /// lifetime management
   virtual void reset() {}
@@ -105,21 +106,6 @@ public:
   /// Get information on a fixup kind.
   virtual MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const;
 
-  /// Hook to check if extra nop bytes must be inserted for alignment directive.
-  /// For some targets this may be necessary in order to support linker
-  /// relaxation. The number of bytes to insert are returned in Size.
-  virtual bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF,
-                                                     unsigned &Size) {
-    return false;
-  }
-
-  /// Hook which indicates if the target requires a fixup to be generated when
-  /// handling an align directive in an executable section
-  virtual bool shouldInsertFixupForCodeAlign(MCAssembler &Asm,
-                                             MCAlignFragment &AF) {
-    return false;
-  }
-
   // Evaluate a fixup, returning std::nullopt to use default handling for
   // `Value` and `IsResolved`. Otherwise, returns `IsResolved` with the
   // expectation that the hook updates `Value`.
@@ -177,6 +163,10 @@ public:
   }
 
   // Defined by linker relaxation targets.
+
+  // Return false to use default handling. Otherwise, set `Size` to the number
+  // of padding bytes.
+  virtual bool relaxAlign(MCFragment &F, unsigned &Size) { return false; }
   virtual bool relaxDwarfLineAddr(MCFragment &, bool &WasRelaxed) const {
     return false;
   }
diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h
index ade9ee6f..467ad4e 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -36,8 +36,6 @@ class MCCVDefRangeFragment;
 class MCCVInlineLineTableFragment;
 class MCFragment;
 class MCFixup;
-class MCLEBFragment;
-class MCPseudoProbeAddrFragment;
 class MCSymbolRefExpr;
 class raw_ostream;
 class MCAsmBackend;
@@ -123,7 +121,6 @@ private:
   bool relaxCVInlineLineTable(MCCVInlineLineTableFragment &DF);
   bool relaxCVDefRange(MCCVDefRangeFragment &DF);
   bool relaxFill(MCFillFragment &F);
-  bool relaxPseudoProbeAddr(MCPseudoProbeAddrFragment &DF);
 
 public:
   /// Construct a new assembler instance.
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index c137f61..ddac161 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -333,8 +333,6 @@ private:
   void reportCommon(SMLoc Loc,
                     std::function<void(SMDiagnostic &, const SourceMgr *)>);
 
-  MCFragment *allocInitialFragment(MCSection &Sec);
-
   MCSymbolTableEntry &getSymbolTableEntry(StringRef Name);
 
   MCSymbol *createSymbolImpl(const MCSymbolTableEntry *Name, bool IsTemporary);
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index 319e131..aea93e9 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -40,6 +40,7 @@ class MCObjectStreamer : public MCStreamer {
   std::unique_ptr<MCAssembler> Assembler;
   bool EmitEHFrame;
   bool EmitDebugFrame;
+  bool EmitSFrame;
 
   struct PendingAssignment {
     MCSymbol *Symbol;
@@ -54,7 +55,6 @@ class MCObjectStreamer : public MCStreamer {
   void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &);
   void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
   void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
-  void emitInstructionImpl(const MCInst &Inst, const MCSubtargetInfo &STI);
 
 protected:
   MCObjectStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
@@ -71,14 +71,7 @@ public:
 
   void emitFrames(MCAsmBackend *MAB);
   MCSymbol *emitCFILabel() override;
-  void emitCFISections(bool EH, bool Debug) override;
-
-  /// Get a data fragment to write into, creating a new one if the current
-  /// fragment is not FT_Data.
-  MCFragment *getOrCreateDataFragment();
-
-protected:
-  bool changeSectionImpl(MCSection *Section, uint32_t Subsection);
+  void emitCFISections(bool EH, bool Debug, bool SFrame) override;
 
 public:
   void visitUsedSymbol(const MCSymbol &Sym) override;
@@ -88,6 +81,15 @@ public:
   /// \name MCStreamer Interface
   /// @{
 
+  // Add a fragment with a variable-size tail and start a new empty fragment.
+  void insert(MCFragment *F);
+
+  // Add a new fragment to the current section without a variable-size tail.
+  void newFragment();
+
+  void appendContents(size_t Num, char Elt);
+  void addFixup(const MCExpr *Value, MCFixupKind Kind);
+
   void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
   virtual void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment &F,
                               uint64_t Offset);
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 313071e..125f849 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -39,150 +39,6 @@ class MCSubtargetInfo;
 class raw_ostream;
 class Triple;
 
-/// Instances of this class represent a uniqued identifier for a section in the
-/// current translation unit.  The MCContext class uniques and creates these.
-class LLVM_ABI MCSection {
-public:
-  friend MCAssembler;
-  friend MCObjectStreamer;
-  friend class MCFragment;
-  static constexpr unsigned NonUniqueID = ~0U;
-
-  enum SectionVariant {
-    SV_COFF = 0,
-    SV_ELF,
-    SV_GOFF,
-    SV_MachO,
-    SV_Wasm,
-    SV_XCOFF,
-    SV_SPIRV,
-    SV_DXContainer,
-  };
-
-  struct iterator {
-    MCFragment *F = nullptr;
-    iterator() = default;
-    explicit iterator(MCFragment *F) : F(F) {}
-    MCFragment &operator*() const { return *F; }
-    bool operator==(const iterator &O) const { return F == O.F; }
-    bool operator!=(const iterator &O) const { return F != O.F; }
-    iterator &operator++();
-  };
-
-  struct FragList {
-    MCFragment *Head = nullptr;
-    MCFragment *Tail = nullptr;
-  };
-
-private:
-  // At parse time, this holds the fragment list of the current subsection. At
-  // layout time, this holds the concatenated fragment lists of all subsections.
-  FragList *CurFragList;
-  MCSymbol *Begin;
-  MCSymbol *End = nullptr;
-  /// The alignment requirement of this section.
-  Align Alignment;
-  /// The section index in the assemblers section list.
-  unsigned Ordinal = 0;
-
-  /// Whether this section has had instructions emitted into it.
-  bool HasInstructions : 1;
-
-  bool IsRegistered : 1;
-
-  bool IsText : 1;
-
-  bool IsVirtual : 1;
-
-  /// Whether the section contains linker-relaxable fragments. If true, the
-  /// offset between two locations may not be fully resolved.
-  bool LinkerRelaxable : 1;
-
-  // Mapping from subsection number to fragment list. At layout time, the
-  // subsection 0 list is replaced with concatenated fragments from all
-  // subsections.
-  SmallVector<std::pair<unsigned, FragList>, 1> Subsections;
-
-  // Content and fixup storage for fragments
-  SmallVector<char, 0> ContentStorage;
-  SmallVector<MCFixup, 0> FixupStorage;
-  SmallVector<MCOperand, 0> MCOperandStorage;
-
-protected:
-  // TODO Make Name private when possible.
-  StringRef Name;
-  SectionVariant Variant;
-
-  MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsVirtual,
-            MCSymbol *Begin);
-  // Protected non-virtual dtor prevents destroy through a base class pointer.
-  ~MCSection() {}
-
-public:
-  MCSection(const MCSection &) = delete;
-  MCSection &operator=(const MCSection &) = delete;
-
-  StringRef getName() const { return Name; }
-  bool isText() const { return IsText; }
-
-  SectionVariant getVariant() const { return Variant; }
-
-  MCSymbol *getBeginSymbol() { return Begin; }
-  const MCSymbol *getBeginSymbol() const {
-    return const_cast<MCSection *>(this)->getBeginSymbol();
-  }
-  void setBeginSymbol(MCSymbol *Sym) {
-    assert(!Begin);
-    Begin = Sym;
-  }
-  MCSymbol *getEndSymbol(MCContext &Ctx);
-  bool hasEnded() const;
-
-  Align getAlign() const { return Alignment; }
-  void setAlignment(Align Value) { Alignment = Value; }
-
-  /// Makes sure that Alignment is at least MinAlignment.
-  void ensureMinAlignment(Align MinAlignment) {
-    if (Alignment < MinAlignment)
-      Alignment = MinAlignment;
-  }
-
-  unsigned getOrdinal() const { return Ordinal; }
-  void setOrdinal(unsigned Value) { Ordinal = Value; }
-
-  bool hasInstructions() const { return HasInstructions; }
-  void setHasInstructions(bool Value) { HasInstructions = Value; }
-
-  bool isRegistered() const { return IsRegistered; }
-  void setIsRegistered(bool Value) { IsRegistered = Value; }
-
-  bool isLinkerRelaxable() const { return LinkerRelaxable; }
-  void setLinkerRelaxable() { LinkerRelaxable = true; }
-
-  MCFragment &getDummyFragment() { return *Subsections[0].second.Head; }
-
-  FragList *curFragList() const { return CurFragList; }
-  iterator begin() const { return iterator(CurFragList->Head); }
-  iterator end() const { return {}; }
-
-  void dump(DenseMap<const MCFragment *, SmallVector<const MCSymbol *, 0>>
-                *FragToSyms = nullptr) const;
-
-  virtual void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
-                                    raw_ostream &OS,
-                                    uint32_t Subsection) const = 0;
-
-  /// Return true if a .align directive should use "optimized nops" to fill
-  /// instead of 0s.
-  virtual bool useCodeAlign() const = 0;
-
-  /// Check whether this section is "virtual", that is has no actual object
-  /// file contents.
-  bool isVirtualSection() const { return IsVirtual; }
-
-  virtual StringRef getVirtualSectionKind() const;
-};
-
 // Represents a contiguous piece of code or data within a section. Its size is
 // determined by MCAssembler::layout. All subclasses must have trivial
 // destructors.
@@ -207,7 +63,6 @@ public:
     FT_SymbolId,
     FT_CVInlineLines,
     FT_CVDefRange,
-    FT_PseudoProbe,
   };
 
 private:
@@ -235,11 +90,16 @@ protected:
   /// FT_Relaxable, x86-specific
   bool AllowAutoPadding : 1;
 
+  // Track content and fixups for the fixed-size part as fragments are
+  // appended to the section. The content remains immutable, except when
+  // modified by applyFixup.
   uint32_t ContentStart = 0;
   uint32_t ContentEnd = 0;
   uint32_t FixupStart = 0;
   uint32_t FixupEnd = 0;
 
+  // Track content and fixups for the optional variable-size tail part,
+  // typically modified during relaxation.
   uint32_t VarContentStart = 0;
   uint32_t VarContentEnd = 0;
   uint32_t VarFixupStart = 0;
@@ -256,6 +116,19 @@ protected:
       uint32_t OperandSize;
     } relax;
     struct {
+      // The alignment to ensure, in bytes.
+      Align Alignment;
+      // The size of the integer (in bytes) of \p Value.
+      uint8_t FillLen;
+      // If true, fill with target-specific nop instructions.
+      bool EmitNops;
+      // The maximum number of bytes to emit; if the alignment
+      // cannot be satisfied in this width then this fragment is ignored.
+      unsigned MaxBytesToEmit;
+      // Value to use for filling padding bytes.
+      int64_t Fill;
+    } align;
+    struct {
       // True if this is a sleb128, false if uleb128.
       bool IsSigned;
       // The value this fragment should contain.
@@ -284,10 +157,10 @@ public:
       return false;
     case MCFragment::FT_Relaxable:
     case MCFragment::FT_Data:
+    case MCFragment::FT_Align:
     case MCFragment::FT_Dwarf:
     case MCFragment::FT_DwarfFrame:
     case MCFragment::FT_LEB:
-    case MCFragment::FT_PseudoProbe:
     case MCFragment::FT_CVInlineLines:
     case MCFragment::FT_CVDefRange:
       return true;
@@ -329,24 +202,13 @@ public:
   bool getAllowAutoPadding() const { return AllowAutoPadding; }
   void setAllowAutoPadding(bool V) { AllowAutoPadding = V; }
 
-  // Content-related functions manage parent's storage using ContentStart and
+  //== Content-related functions manage parent's storage using ContentStart and
   // ContentSize.
-  void clearContents() { ContentEnd = ContentStart; }
+
   // Get a SmallVector reference. The caller should call doneAppending to update
   // `ContentEnd`.
-  SmallVectorImpl<char> &getContentsForAppending() {
-    SmallVectorImpl<char> &S = getParent()->ContentStorage;
-    if (LLVM_UNLIKELY(ContentEnd != S.size())) {
-      // Move the elements to the end. Reserve space to avoid invalidating
-      // S.begin()+I for `append`.
-      auto Size = ContentEnd - ContentStart;
-      auto I = std::exchange(ContentStart, S.size());
-      S.reserve(S.size() + Size);
-      S.append(S.begin() + I, S.begin() + I + Size);
-    }
-    return S;
-  }
-  void doneAppending() { ContentEnd = getParent()->ContentStorage.size(); }
+  SmallVectorImpl<char> &getContentsForAppending();
+  void doneAppending();
   void appendContents(ArrayRef<char> Contents) {
     getContentsForAppending().append(Contents.begin(), Contents.end());
     doneAppending();
@@ -355,26 +217,13 @@ public:
     getContentsForAppending().append(Num, Elt);
     doneAppending();
   }
-  LLVM_ABI void setContents(ArrayRef<char> Contents);
-  MutableArrayRef<char> getContents() {
-    return MutableArrayRef(getParent()->ContentStorage)
-        .slice(ContentStart, ContentEnd - ContentStart);
-  }
-  ArrayRef<char> getContents() const {
-    return ArrayRef(getParent()->ContentStorage)
-        .slice(ContentStart, ContentEnd - ContentStart);
-  }
+  MutableArrayRef<char> getContents();
+  ArrayRef<char> getContents() const;
 
   void setVarContents(ArrayRef<char> Contents);
   void clearVarContents() { setVarContents({}); }
-  MutableArrayRef<char> getVarContents() {
-    return MutableArrayRef(getParent()->ContentStorage)
-        .slice(VarContentStart, VarContentEnd - VarContentStart);
-  }
-  ArrayRef<char> getVarContents() const {
-    return ArrayRef(getParent()->ContentStorage)
-        .slice(VarContentStart, VarContentEnd - VarContentStart);
-  }
+  MutableArrayRef<char> getVarContents();
+  ArrayRef<char> getVarContents() const;
 
   size_t getFixedSize() const { return ContentEnd - ContentStart; }
   size_t getVarSize() const { return VarContentEnd - VarContentStart; }
@@ -387,62 +236,64 @@ public:
   void clearFixups() { FixupEnd = FixupStart; }
   LLVM_ABI void addFixup(MCFixup Fixup);
   LLVM_ABI void appendFixups(ArrayRef<MCFixup> Fixups);
-  LLVM_ABI void setFixups(ArrayRef<MCFixup> Fixups);
-  MutableArrayRef<MCFixup> getFixups() {
-    return MutableArrayRef(getParent()->FixupStorage)
-        .slice(FixupStart, FixupEnd - FixupStart);
-  }
-  ArrayRef<MCFixup> getFixups() const {
-    return ArrayRef(getParent()->FixupStorage)
-        .slice(FixupStart, FixupEnd - FixupStart);
-  }
+  MutableArrayRef<MCFixup> getFixups();
+  ArrayRef<MCFixup> getFixups() const;
 
   // Source fixup offsets are relative to the variable part's start.
   // Stored fixup offsets are relative to the fixed part's start.
   void setVarFixups(ArrayRef<MCFixup> Fixups);
   void clearVarFixups() { setVarFixups({}); }
-  MutableArrayRef<MCFixup> getVarFixups() {
-    return MutableArrayRef(getParent()->FixupStorage)
-        .slice(VarFixupStart, VarFixupEnd - VarFixupStart);
-  }
-  ArrayRef<MCFixup> getVarFixups() const {
-    return ArrayRef(getParent()->FixupStorage)
-        .slice(VarFixupStart, VarFixupEnd - VarFixupStart);
-  }
+  MutableArrayRef<MCFixup> getVarFixups();
+  ArrayRef<MCFixup> getVarFixups() const;
 
   //== FT_Relaxable functions
   unsigned getOpcode() const {
     assert(Kind == FT_Relaxable);
     return u.relax.Opcode;
   }
-  ArrayRef<MCOperand> getOperands() const {
-    assert(Kind == FT_Relaxable);
-    return MutableArrayRef(getParent()->MCOperandStorage)
-        .slice(u.relax.OperandStart, u.relax.OperandSize);
+  ArrayRef<MCOperand> getOperands() const;
+  MCInst getInst() const;
+  void setInst(const MCInst &Inst);
+
+  //== FT_Align functions
+  void makeAlign(Align Alignment, int64_t Fill, uint8_t FillLen,
+                 unsigned MaxBytesToEmit) {
+    Kind = FT_Align;
+    u.align.EmitNops = false;
+    u.align.Alignment = Alignment;
+    u.align.Fill = Fill;
+    u.align.FillLen = FillLen;
+    u.align.MaxBytesToEmit = MaxBytesToEmit;
   }
-  MCInst getInst() const {
-    assert(Kind == FT_Relaxable);
-    MCInst Inst;
-    Inst.setOpcode(u.relax.Opcode);
-    Inst.setFlags(u.relax.Flags);
-    Inst.setOperands(ArrayRef(getParent()->MCOperandStorage)
-                         .slice(u.relax.OperandStart, u.relax.OperandSize));
-    return Inst;
-  }
-  void setInst(const MCInst &Inst) {
-    assert(Kind == FT_Relaxable);
-    u.relax.Opcode = Inst.getOpcode();
-    u.relax.Flags = Inst.getFlags();
-    auto &S = getParent()->MCOperandStorage;
-    if (Inst.getNumOperands() > u.relax.OperandSize) {
-      u.relax.OperandStart = S.size();
-      S.resize_for_overwrite(S.size() + Inst.getNumOperands());
-    }
-    u.relax.OperandSize = Inst.getNumOperands();
-    llvm::copy(Inst, S.begin() + u.relax.OperandStart);
+
+  Align getAlignment() const {
+    assert(Kind == FT_Align);
+    return u.align.Alignment;
+  }
+  int64_t getAlignFill() const {
+    assert(Kind == FT_Align);
+    return u.align.Fill;
+  }
+  uint8_t getAlignFillLen() const {
+    assert(Kind == FT_Align);
+    return u.align.FillLen;
+  }
+  unsigned getAlignMaxBytesToEmit() const {
+    assert(Kind == FT_Align);
+    return u.align.MaxBytesToEmit;
+  }
+  bool hasAlignEmitNops() const {
+    assert(Kind == FT_Align);
+    return u.align.EmitNops;
   }
 
   //== FT_LEB functions
+  void makeLEB(bool IsSigned, const MCExpr *Value) {
+    assert(Kind == FT_Data);
+    Kind = MCFragment::FT_LEB;
+    u.leb.IsSigned = IsSigned;
+    u.leb.Value = Value;
+  }
   const MCExpr &getLEBValue() const {
     assert(Kind == FT_LEB);
     return *u.leb.Value;
@@ -455,10 +306,6 @@ public:
     assert(Kind == FT_LEB);
     return u.leb.IsSigned;
   }
-  void setLEBSigned(bool S) {
-    assert(Kind == FT_LEB);
-    u.leb.IsSigned = S;
-  }
 
   //== FT_DwarfFrame functions
   const MCExpr &getDwarfAddrDelta() const {
@@ -487,52 +334,6 @@ protected:
       : MCFragment(FType, HasInstructions) {}
 };
 
-class MCAlignFragment : public MCFragment {
-  /// Flag to indicate that (optimal) NOPs should be emitted instead
-  /// of using the provided value. The exact interpretation of this flag is
-  /// target dependent.
-  bool EmitNops : 1;
-
-  /// The alignment to ensure, in bytes.
-  Align Alignment;
-
-  /// The size of the integer (in bytes) of \p Value.
-  uint8_t FillLen;
-
-  /// The maximum number of bytes to emit; if the alignment
-  /// cannot be satisfied in this width then this fragment is ignored.
-  unsigned MaxBytesToEmit;
-
-  /// Value to use for filling padding bytes.
-  int64_t Fill;
-
-  /// When emitting Nops some subtargets have specific nop encodings.
-  const MCSubtargetInfo *STI = nullptr;
-
-public:
-  MCAlignFragment(Align Alignment, int64_t Fill, uint8_t FillLen,
-                  unsigned MaxBytesToEmit)
-      : MCFragment(FT_Align, false), EmitNops(false), Alignment(Alignment),
-        FillLen(FillLen), MaxBytesToEmit(MaxBytesToEmit), Fill(Fill) {}
-
-  Align getAlignment() const { return Alignment; }
-  int64_t getFill() const { return Fill; }
-  uint8_t getFillLen() const { return FillLen; }
-  unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
-
-  bool hasEmitNops() const { return EmitNops; }
-  void setEmitNops(bool Value, const MCSubtargetInfo *STI) {
-    EmitNops = Value;
-    this->STI = STI;
-  }
-
-  const MCSubtargetInfo *getSubtargetInfo() const { return STI; }
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_Align;
-  }
-};
-
 class MCFillFragment : public MCFragment {
   uint8_t ValueSize;
   /// Value to use for filling bytes.
@@ -730,22 +531,230 @@ public:
   }
 };
 
-class MCPseudoProbeAddrFragment : public MCEncodedFragment {
-  /// The expression for the difference of the two symbols that
-  /// make up the address delta between two .pseudoprobe directives.
-  const MCExpr *AddrDelta;
+/// Instances of this class represent a uniqued identifier for a section in the
+/// current translation unit.  The MCContext class uniques and creates these.
+class LLVM_ABI MCSection {
+public:
+  friend MCAssembler;
+  friend MCObjectStreamer;
+  friend class MCFragment;
+  static constexpr unsigned NonUniqueID = ~0U;
+
+  enum SectionVariant {
+    SV_COFF = 0,
+    SV_ELF,
+    SV_GOFF,
+    SV_MachO,
+    SV_Wasm,
+    SV_XCOFF,
+    SV_SPIRV,
+    SV_DXContainer,
+  };
+
+  struct iterator {
+    MCFragment *F = nullptr;
+    iterator() = default;
+    explicit iterator(MCFragment *F) : F(F) {}
+    MCFragment &operator*() const { return *F; }
+    bool operator==(const iterator &O) const { return F == O.F; }
+    bool operator!=(const iterator &O) const { return F != O.F; }
+    iterator &operator++();
+  };
+
+  struct FragList {
+    MCFragment *Head = nullptr;
+    MCFragment *Tail = nullptr;
+  };
+
+private:
+  // At parse time, this holds the fragment list of the current subsection. At
+  // layout time, this holds the concatenated fragment lists of all subsections.
+  FragList *CurFragList;
+  MCSymbol *Begin;
+  MCSymbol *End = nullptr;
+  /// The alignment requirement of this section.
+  Align Alignment;
+  /// The section index in the assemblers section list.
+  unsigned Ordinal = 0;
+
+  /// Whether this section has had instructions emitted into it.
+  bool HasInstructions : 1;
+
+  bool IsRegistered : 1;
+
+  bool IsText : 1;
+  bool IsBss : 1;
+
+  /// Whether the section contains linker-relaxable fragments. If true, the
+  /// offset between two locations may not be fully resolved.
+  bool LinkerRelaxable : 1;
+
+  MCFragment DummyFragment;
+
+  // Mapping from subsection number to fragment list. At layout time, the
+  // subsection 0 list is replaced with concatenated fragments from all
+  // subsections.
+  SmallVector<std::pair<unsigned, FragList>, 1> Subsections;
+
+  // Content and fixup storage for fragments
+  SmallVector<char, 0> ContentStorage;
+  SmallVector<MCFixup, 0> FixupStorage;
+  SmallVector<MCOperand, 0> MCOperandStorage;
+
+protected:
+  // TODO Make Name private when possible.
+  StringRef Name;
+  SectionVariant Variant;
+
+  MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsBss,
+            MCSymbol *Begin);
+  // Protected non-virtual dtor prevents destroy through a base class pointer.
+  ~MCSection() {}
 
 public:
-  MCPseudoProbeAddrFragment(const MCExpr *AddrDelta)
-      : MCEncodedFragment(FT_PseudoProbe, false), AddrDelta(AddrDelta) {}
+  MCSection(const MCSection &) = delete;
+  MCSection &operator=(const MCSection &) = delete;
 
-  const MCExpr &getAddrDelta() const { return *AddrDelta; }
+  StringRef getName() const { return Name; }
+  bool isText() const { return IsText; }
 
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_PseudoProbe;
+  SectionVariant getVariant() const { return Variant; }
+
+  MCSymbol *getBeginSymbol() { return Begin; }
+  const MCSymbol *getBeginSymbol() const {
+    return const_cast<MCSection *>(this)->getBeginSymbol();
+  }
+  void setBeginSymbol(MCSymbol *Sym) {
+    assert(!Begin);
+    Begin = Sym;
   }
+  MCSymbol *getEndSymbol(MCContext &Ctx);
+  bool hasEnded() const;
+
+  Align getAlign() const { return Alignment; }
+  void setAlignment(Align Value) { Alignment = Value; }
+
+  /// Makes sure that Alignment is at least MinAlignment.
+  void ensureMinAlignment(Align MinAlignment) {
+    if (Alignment < MinAlignment)
+      Alignment = MinAlignment;
+  }
+
+  unsigned getOrdinal() const { return Ordinal; }
+  void setOrdinal(unsigned Value) { Ordinal = Value; }
+
+  bool hasInstructions() const { return HasInstructions; }
+  void setHasInstructions(bool Value) { HasInstructions = Value; }
+
+  bool isRegistered() const { return IsRegistered; }
+  void setIsRegistered(bool Value) { IsRegistered = Value; }
+
+  bool isLinkerRelaxable() const { return LinkerRelaxable; }
+  void setLinkerRelaxable() { LinkerRelaxable = true; }
+
+  MCFragment &getDummyFragment() { return DummyFragment; }
+
+  FragList *curFragList() const { return CurFragList; }
+  iterator begin() const { return iterator(CurFragList->Head); }
+  iterator end() const { return {}; }
+
+  void dump(DenseMap<const MCFragment *, SmallVector<const MCSymbol *, 0>>
+                *FragToSyms = nullptr) const;
+
+  virtual void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+                                    raw_ostream &OS,
+                                    uint32_t Subsection) const = 0;
+
+  /// Return true if a .align directive should use "optimized nops" to fill
+  /// instead of 0s.
+  virtual bool useCodeAlign() const = 0;
+
+  /// Check whether this section is "virtual", that is has no actual object
+  /// file contents.
+  bool isBssSection() const { return IsBss; }
 };
 
+inline SmallVectorImpl<char> &MCFragment::getContentsForAppending() {
+  SmallVectorImpl<char> &S = getParent()->ContentStorage;
+  if (LLVM_UNLIKELY(ContentEnd != S.size())) {
+    // Move the elements to the end. Reserve space to avoid invalidating
+    // S.begin()+I for `append`.
+    auto Size = ContentEnd - ContentStart;
+    auto I = std::exchange(ContentStart, S.size());
+    S.reserve(S.size() + Size);
+    S.append(S.begin() + I, S.begin() + I + Size);
+  }
+  return S;
+}
+inline void MCFragment::doneAppending() {
+  ContentEnd = getParent()->ContentStorage.size();
+}
+inline MutableArrayRef<char> MCFragment::getContents() {
+  return MutableArrayRef(getParent()->ContentStorage)
+      .slice(ContentStart, ContentEnd - ContentStart);
+}
+inline ArrayRef<char> MCFragment::getContents() const {
+  return ArrayRef(getParent()->ContentStorage)
+      .slice(ContentStart, ContentEnd - ContentStart);
+}
+
+inline MutableArrayRef<char> MCFragment::getVarContents() {
+  return MutableArrayRef(getParent()->ContentStorage)
+      .slice(VarContentStart, VarContentEnd - VarContentStart);
+}
+inline ArrayRef<char> MCFragment::getVarContents() const {
+  return ArrayRef(getParent()->ContentStorage)
+      .slice(VarContentStart, VarContentEnd - VarContentStart);
+}
+
+//== Fixup-related functions manage parent's storage using FixupStart and
+// FixupSize.
+inline MutableArrayRef<MCFixup> MCFragment::getFixups() {
+  return MutableArrayRef(getParent()->FixupStorage)
+      .slice(FixupStart, FixupEnd - FixupStart);
+}
+inline ArrayRef<MCFixup> MCFragment::getFixups() const {
+  return ArrayRef(getParent()->FixupStorage)
+      .slice(FixupStart, FixupEnd - FixupStart);
+}
+
+inline MutableArrayRef<MCFixup> MCFragment::getVarFixups() {
+  return MutableArrayRef(getParent()->FixupStorage)
+      .slice(VarFixupStart, VarFixupEnd - VarFixupStart);
+}
+inline ArrayRef<MCFixup> MCFragment::getVarFixups() const {
+  return ArrayRef(getParent()->FixupStorage)
+      .slice(VarFixupStart, VarFixupEnd - VarFixupStart);
+}
+
+//== FT_Relaxable functions
+inline ArrayRef<MCOperand> MCFragment::getOperands() const {
+  assert(Kind == FT_Relaxable);
+  return MutableArrayRef(getParent()->MCOperandStorage)
+      .slice(u.relax.OperandStart, u.relax.OperandSize);
+}
+inline MCInst MCFragment::getInst() const {
+  assert(Kind == FT_Relaxable);
+  MCInst Inst;
+  Inst.setOpcode(u.relax.Opcode);
+  Inst.setFlags(u.relax.Flags);
+  Inst.setOperands(ArrayRef(getParent()->MCOperandStorage)
+                       .slice(u.relax.OperandStart, u.relax.OperandSize));
+  return Inst;
+}
+inline void MCFragment::setInst(const MCInst &Inst) {
+  assert(Kind == FT_Relaxable);
+  u.relax.Opcode = Inst.getOpcode();
+  u.relax.Flags = Inst.getFlags();
+  auto &S = getParent()->MCOperandStorage;
+  if (Inst.getNumOperands() > u.relax.OperandSize) {
+    u.relax.OperandStart = S.size();
+    S.resize_for_overwrite(S.size() + Inst.getNumOperands());
+  }
+  u.relax.OperandSize = Inst.getNumOperands();
+  llvm::copy(Inst, S.begin() + u.relax.OperandStart);
+}
+
 inline MCSection::iterator &MCSection::iterator::operator++() {
   F = F->Next;
   return *this;
diff --git a/llvm/include/llvm/MC/MCSectionCOFF.h b/llvm/include/llvm/MC/MCSectionCOFF.h
index 4472a12..f979413a 100644
--- a/llvm/include/llvm/MC/MCSectionCOFF.h
+++ b/llvm/include/llvm/MC/MCSectionCOFF.h
@@ -82,7 +82,6 @@ public:
                             raw_ostream &OS,
                             uint32_t Subsection) const override;
   bool useCodeAlign() const override;
-  StringRef getVirtualSectionKind() const override;
 
   unsigned getOrAssignWinCFISectionID(unsigned *NextID) const {
     if (WinCFISectionID == ~0U)
diff --git a/llvm/include/llvm/MC/MCSectionELF.h b/llvm/include/llvm/MC/MCSectionELF.h
index f09d305..64a4daf 100644
--- a/llvm/include/llvm/MC/MCSectionELF.h
+++ b/llvm/include/llvm/MC/MCSectionELF.h
@@ -68,10 +68,6 @@ private:
       Group.getPointer()->setIsSignature();
   }
 
-  // TODO Delete after we stop supporting generation of GNU-style .zdebug_*
-  // sections.
-  void setSectionName(StringRef Name) { this->Name = Name; }
-
 public:
   /// Decides whether a '.section' directive should be printed before the
   /// section name
@@ -88,7 +84,6 @@ public:
                             raw_ostream &OS,
                             uint32_t Subsection) const override;
   bool useCodeAlign() const override;
-  StringRef getVirtualSectionKind() const override;
 
   bool isUnique() const { return UniqueID != NonUniqueID; }
   unsigned getUniqueID() const { return UniqueID; }
diff --git a/llvm/include/llvm/MC/MCSectionGOFF.h b/llvm/include/llvm/MC/MCSectionGOFF.h
index 9e3f95e..b166397 100644
--- a/llvm/include/llvm/MC/MCSectionGOFF.h
+++ b/llvm/include/llvm/MC/MCSectionGOFF.h
@@ -111,7 +111,7 @@ public:
 
   // Returns the text style for a section. Only defined for ED and PR sections.
   GOFF::ESDTextStyle getTextStyle() const {
-    assert((isED() || isPR() || isVirtualSection()) && "Expect ED or PR section");
+    assert((isED() || isPR() || isBssSection()) && "Expect ED or PR section");
     if (isED())
       return EDAttributes.TextStyle;
     if (isPR())
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index 4b91dbc..8ccce80 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -259,6 +259,8 @@ class LLVM_ABI MCStreamer {
   bool AllowAutoPadding = false;
 
 protected:
+  bool IsObj = false;
+
   // Symbol of the current epilog for which we are processing SEH directives.
   WinEH::FrameInfo::Epilog *CurrentWinEpilog = nullptr;
 
@@ -270,6 +272,8 @@ protected:
   /// section changes.
   virtual void changeSection(MCSection *, uint32_t);
 
+  void addFragment(MCFragment *F);
+
   virtual void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame);
   virtual void emitCFIEndProcImpl(MCDwarfFrameInfo &CurFrame);
 
@@ -308,6 +312,7 @@ public:
   virtual void reset();
 
   MCContext &getContext() const { return Context; }
+  bool isObj() const { return IsObj; }
 
   // MCObjectStreamer has an MCAssembler and allows more expression folding at
   // parse time.
@@ -425,10 +430,15 @@ public:
   }
 
   MCFragment *getCurrentFragment() const {
+    // Ensure consistency with the section stack.
     assert(!getCurrentSection().first ||
            CurFrag->getParent() == getCurrentSection().first);
+    // Ensure we eagerly allocate an empty fragment after adding fragment with a
+    // variable-size tail.
+    assert(!CurFrag || CurFrag->getKind() == MCFragment::FT_Data);
     return CurFrag;
   }
+  size_t getCurFragSize() const { return getCurrentFragment()->getFixedSize(); }
   /// Save the current and previous section on the section stack.
   void pushSection() {
     SectionStack.push_back(
@@ -456,9 +466,6 @@ public:
 
   MCSymbol *endSection(MCSection *Section);
 
-  void insert(MCFragment *F);
-  void newFragment();
-
   /// Returns the mnemonic for \p MI, if the streamer has access to a
   /// instruction printer and returns an empty string otherwise.
   virtual StringRef getMnemonic(const MCInst &MI) const { return ""; }
@@ -979,7 +986,7 @@ public:
                                                const MCSymbol *Lo);
 
   virtual MCSymbol *getDwarfLineTableSymbol(unsigned CUID);
-  virtual void emitCFISections(bool EH, bool Debug);
+  virtual void emitCFISections(bool EH, bool Debug, bool SFrame);
   void emitCFIStartProc(bool IsSimple, SMLoc Loc = SMLoc());
   void emitCFIEndProc();
   virtual void emitCFIDefCfa(int64_t Register, int64_t Offset, SMLoc Loc = {});
diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h
index d95adf9..235d58d 100644
--- a/llvm/include/llvm/MC/MCTargetOptions.h
+++ b/llvm/include/llvm/MC/MCTargetOptions.h
@@ -102,6 +102,9 @@ public:
   // functions on Darwins.
   bool EmitCompactUnwindNonCanonical : 1;
 
+  // Whether to emit SFrame unwind sections.
+  bool EmitSFrameUnwind : 1;
+
   // Whether or not to use full register names on PowerPC.
   bool PPCUseFullRegisterNames : 1;
 
diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
index b057eff..adfdccd 100644
--- a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
+++ b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
@@ -40,6 +40,8 @@ LLVM_ABI EmitDwarfUnwindType getEmitDwarfUnwind();
 
 LLVM_ABI bool getEmitCompactUnwindNonCanonical();
 
+LLVM_ABI bool getEmitSFrameUnwind();
+
 LLVM_ABI bool getShowMCInst();
 
 LLVM_ABI bool getFatalWarnings();
diff --git a/llvm/include/llvm/MC/MCXCOFFStreamer.h b/llvm/include/llvm/MC/MCXCOFFStreamer.h
index 870d48f..c3bc2ca9 100644
--- a/llvm/include/llvm/MC/MCXCOFFStreamer.h
+++ b/llvm/include/llvm/MC/MCXCOFFStreamer.h
@@ -22,6 +22,7 @@ public:
 
   XCOFFObjectWriter &getWriter();
 
+  void changeSection(MCSection *Section, uint32_t Subsection = 0) override;
   bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
   void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                         Align ByteAlignment) override;
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index a3aa0d9..ced1afd 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -1479,6 +1479,7 @@ template <class ELFT> Triple::OSType ELFObjectFile<ELFT>::getOS() const {
   case ELF::ELFOSABI_OPENBSD:
     return Triple::OpenBSD;
   case ELF::ELFOSABI_CUDA:
+  case ELF::ELFOSABI_CUDA_V2:
     return Triple::CUDA;
   case ELF::ELFOSABI_AMDGPU_HSA:
     return Triple::AMDHSA;
diff --git a/llvm/include/llvm/Object/SFrameParser.h b/llvm/include/llvm/Object/SFrameParser.h
new file mode 100644
index 0000000..cf4fe20
--- /dev/null
+++ b/llvm/include/llvm/Object/SFrameParser.h
@@ -0,0 +1,48 @@
+//===- SFrameParser.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_SFRAME_H
+#define LLVM_OBJECT_SFRAME_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/BinaryFormat/SFrame.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+
+namespace llvm {
+namespace object {
+
+template <endianness E> class SFrameParser {
+public:
+  static Expected<SFrameParser> create(ArrayRef<uint8_t> Contents);
+
+  const sframe::Preamble<E> &getPreamble() const { return Header.Preamble; }
+  const sframe::Header<E> &getHeader() const { return Header; }
+
+  bool usesFixedRAOffset() const {
+    return getHeader().ABIArch == sframe::ABI::AMD64EndianLittle;
+  }
+  bool usesFixedFPOffset() const {
+    return false; // Not used in any currently defined ABI.
+  }
+
+private:
+  ArrayRef<uint8_t> Data;
+  const sframe::Header<E> &Header;
+
+  SFrameParser(ArrayRef<uint8_t> Data, const sframe::Header<E> &Header)
+      : Data(Data), Header(Header) {}
+};
+
+extern template class SFrameParser<endianness::big>;
+extern template class SFrameParser<endianness::little>;
+
+} // end namespace object
+} // end namespace llvm
+
+#endif // LLVM_OBJECT_SFRAME_H
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 732fdc7..bee2106 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass())
 MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass())
 MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass())
 MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass())
+MACHINE_FUNCTION_PASS("finalizebundle-test", FinalizeBundleTestPass())
 MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", FixupStatepointCallerSavedPass())
 MACHINE_FUNCTION_PASS("init-undef", InitUndefPass())
 MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass())
diff --git a/llvm/include/llvm/Support/AArch64AttributeParser.h b/llvm/include/llvm/Support/AArch64AttributeParser.h
index aa82ca1..796dbfd 100644
--- a/llvm/include/llvm/Support/AArch64AttributeParser.h
+++ b/llvm/include/llvm/Support/AArch64AttributeParser.h
@@ -25,6 +25,17 @@ public:
       : ELFExtendedAttrParser(nullptr, returnTagsNamesMap()) {}
 };
 
+// Used for extracting AArch64 Build Attributes
+struct AArch64BuildAttrSubsections {
+  struct PauthSubSection {
+    uint64_t TagPlatform = 0;
+    uint64_t TagSchema = 0;
+  } Pauth;
+  uint32_t AndFeatures = 0;
+};
+
+AArch64BuildAttrSubsections
+extractBuildAttributesSubsections(const llvm::AArch64AttributeParser &);
 } // namespace llvm
 
 #endif // LLVM_SUPPORT_AARCH64ATTRIBUTEPARSER_H
diff --git a/llvm/include/llvm/Support/AlwaysTrue.h b/llvm/include/llvm/Support/AlwaysTrue.h
new file mode 100644
index 0000000..b696856
--- /dev/null
+++ b/llvm/include/llvm/Support/AlwaysTrue.h
@@ -0,0 +1,25 @@
+//===--- AlwaysTrue.h - Helper for oqaque truthy values        --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ALWAYS_TRUE_H
+#define LLVM_SUPPORT_ALWAYS_TRUE_H
+
+#include <cstdlib>
+
+namespace llvm {
+inline bool getNonFoldableAlwaysTrue() {
+  // Some parts of the codebase require a "constant true value" used as a
+  // predicate. These cases require that even with LTO and static linking,
+  // it's not possible for the compiler to fold the value. As compilers
+  // aren't smart enough to know that getenv() never returns -1, this will do
+  // the job.
+  return std::getenv("LLVM_IGNORED_ENV_VAR") != (char *)-1;
+}
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_ALWAYS_TRUE_H
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index adaa75c..ca725b8 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -1518,11 +1518,18 @@ public:
       [](const typename ParserClass::parser_data_type &) {};
 };
 
-extern template class opt<unsigned>;
-extern template class opt<int>;
-extern template class opt<std::string>;
-extern template class opt<char>;
-extern template class opt<bool>;
+#if !(defined(LLVM_ENABLE_LLVM_EXPORT_ANNOTATIONS) && defined(_MSC_VER))
+// Only instantiate opt<std::string> when not building a Windows DLL. When
+// exporting opt<std::string>, MSVC implicitly exports symbols for
+// std::basic_string through transitive inheritance via std::string. These
+// symbols may appear in clients, leading to duplicate symbol conflicts.
+extern template class LLVM_TEMPLATE_ABI opt<std::string>;
+#endif
+
+extern template class LLVM_TEMPLATE_ABI opt<unsigned>;
+extern template class LLVM_TEMPLATE_ABI opt<int>;
+extern template class LLVM_TEMPLATE_ABI opt<char>;
+extern template class LLVM_TEMPLATE_ABI opt<bool>;
 
 //===----------------------------------------------------------------------===//
 // Default storage class definition: external storage.  This implementation
diff --git a/llvm/include/llvm/Support/DebugLog.h b/llvm/include/llvm/Support/DebugLog.h
new file mode 100644
index 0000000..9556bf2
--- /dev/null
+++ b/llvm/include/llvm/Support/DebugLog.h
@@ -0,0 +1,68 @@
+//===- llvm/Support/DebugLog.h - Logging like debug output ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file contains macros for logging like debug output. It builds upon the
+// support in Debug.h but provides a utility function for common debug output
+// style.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_DEBUGLOG_H
+#define LLVM_SUPPORT_DEBUGLOG_H
+
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+#ifndef NDEBUG
+
+// Output with given inputs and trailing newline. E.g.,
+//   LDBG() << "Bitset contains: " << Bitset;
+// is equivalent to
+//   LLVM_DEBUG(dbgs() << DEBUG_TYPE << " [" << __FILE__ << ":" << __LINE__
+//              << "] " << "Bitset contains: " << Bitset << "\n");
+#define LDBG() DEBUGLOG_WITH_STREAM_AND_TYPE(llvm::dbgs(), DEBUG_TYPE)
+
+#define DEBUGLOG_WITH_STREAM_AND_TYPE(STREAM, TYPE)                            \
+  for (bool _c = (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE)); _c;  \
+       _c = false)                                                             \
+  ::llvm::impl::LogWithNewline(TYPE, __FILE__, __LINE__, (STREAM))
+
+namespace impl {
+class LogWithNewline {
+public:
+  LogWithNewline(const char *debug_type, const char *file, int line,
+                 raw_ostream &os)
+      : os(os) {
+    if (debug_type)
+      os << debug_type << " ";
+    os << "[" << file << ":" << line << "] ";
+  }
+  ~LogWithNewline() { os << '\n'; }
+  template <typename T> raw_ostream &operator<<(const T &t) && {
+    return os << t;
+  }
+
+  // Prevent copying, as this class manages newline responsibility and is
+  // intended for use as a temporary.
+  LogWithNewline(const LogWithNewline &) = delete;
+  LogWithNewline &operator=(const LogWithNewline &) = delete;
+  LogWithNewline &operator=(LogWithNewline &&) = delete;
+
+private:
+  raw_ostream &os;
+};
+} // end namespace impl
+#else
+// As others in Debug, When compiling without assertions, the -debug-* options
+// and all inputs too LDBG() are ignored.
+#define LDBG()                                                                 \
+  for (bool _c = false; _c; _c = false)                                        \
+  ::llvm::nulls()
+#endif
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_DEBUGLOG_H
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 7577792..b65a63b 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -78,6 +78,8 @@ def : GINodeEquiv<G_XOR, xor>;
 def : GINodeEquiv<G_SHL, shl>;
 def : GINodeEquiv<G_LSHR, srl>;
 def : GINodeEquiv<G_ASHR, sra>;
+def : GINodeEquiv<G_ABDS, abds>;
+def : GINodeEquiv<G_ABDU, abdu>;
 def : GINodeEquiv<G_SADDSAT, saddsat>;
 def : GINodeEquiv<G_UADDSAT, uaddsat>;
 def : GINodeEquiv<G_SSUBSAT, ssubsat>;
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 59e8117..8e83b046 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -276,14 +276,14 @@ LLVM_ABI bool isX18ReservedByDefault(const Triple &TT);
 // For a given set of feature names, which can be either target-features, or
 // fmv-features metadata, expand their dependencies and then return a bitmask
 // corresponding to the entries of AArch64::FeatPriorities.
-LLVM_ABI uint64_t getFMVPriority(ArrayRef<StringRef> Features);
+LLVM_ABI APInt getFMVPriority(ArrayRef<StringRef> Features);
 
 // For a given set of FMV feature names, expand their dependencies and then
 // return a bitmask corresponding to the entries of AArch64::CPUFeatures.
 // The values in CPUFeatures are not bitmasks themselves, they are sequential
 // (0, 1, 2, 3, ...). The resulting bitmask is used at runtime to test whether
 // a certain FMV feature is available on the host.
-LLVM_ABI uint64_t getCpuSupportsMask(ArrayRef<StringRef> Features);
+LLVM_ABI APInt getCpuSupportsMask(ArrayRef<StringRef> Features);
 
 LLVM_ABI void PrintSupportedExtensions();
 
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index bb79d25..3f5f427 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -325,7 +325,6 @@ LLVM_ABI void salvageDebugInfo(Instruction &I);
 /// Mark undef if salvaging cannot be completed.
 LLVM_ABI void
 salvageDebugInfoForDbgValues(Instruction &I,
-                             ArrayRef<DbgVariableIntrinsic *> Insns,
                              ArrayRef<DbgVariableRecord *> DPInsns);
 
 /// Given an instruction \p I and DIExpression \p DIExpr operating on
diff --git a/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h b/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h
index f288bdf..e0cdcf8 100644
--- a/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h
+++ b/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h
@@ -57,7 +57,6 @@ struct AllocaInfo {
 
 struct StackInfo {
   MapVector<AllocaInst *, AllocaInfo> AllocasToInstrument;
-  SmallVector<Instruction *, 4> UnrecognizedLifetimes;
   SmallVector<Instruction *, 8> RetVec;
   bool CallsReturnTwice = false;
 };
diff --git a/llvm/include/llvm/Transforms/Utils/ProfileVerify.h b/llvm/include/llvm/Transforms/Utils/ProfileVerify.h
new file mode 100644
index 0000000..7834305
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/ProfileVerify.h
@@ -0,0 +1,36 @@
+//===- ProfileVerify.h - Verify profile info for testing ----------*-C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Inject profile information, as part of tests, to verify passes don't
+// accidentally drop it.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_UTILS_PROFILEVERIFY_H
+#define LLVM_TRANSFORMS_UTILS_PROFILEVERIFY_H
+
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+/// Inject MD_prof metadata where it's missing. Used for testing that passes
+/// don't accidentally drop this metadata.
+class ProfileInjectorPass : public PassInfoMixin<ProfileInjectorPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+
+/// Checks that MD_prof is present on every instruction that supports it. Used
+/// in conjunction with the ProfileInjectorPass. MD_prof "unknown" is considered
+/// valid (i.e. !{!"unknown"})
+class ProfileVerifierPass : public PassInfoMixin<ProfileVerifierPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+
+} // namespace llvm
+#endif