52 files changed, 740 insertions, 332 deletions
diff --git a/llvm/include/llvm/ADT/CombinationGenerator.h b/llvm/include/llvm/ADT/CombinationGenerator.h
index 6100aa9..bbdbd9b 100644
--- a/llvm/include/llvm/ADT/CombinationGenerator.h
+++ b/llvm/include/llvm/ADT/CombinationGenerator.h
@@ -118,10 +118,9 @@ public:
       : VariablesChoices(VariablesChoices_) {
 #ifndef NDEBUG
     assert(!VariablesChoices.empty() && "There should be some variables.");
-    llvm::for_each(VariablesChoices, [](ArrayRef<choice_type> VariableChoices) {
+    for (ArrayRef<choice_type> VariableChoices : VariablesChoices)
       assert(!VariableChoices.empty() &&
              "There must always be some choice, at least a placeholder one.");
-    });
 #endif
   }
 
diff --git a/llvm/include/llvm/ADT/EquivalenceClasses.h b/llvm/include/llvm/ADT/EquivalenceClasses.h
index b1009f8..1a2331c 100644
--- a/llvm/include/llvm/ADT/EquivalenceClasses.h
+++ b/llvm/include/llvm/ADT/EquivalenceClasses.h
@@ -218,12 +218,12 @@ public:
   /// insert - Insert a new value into the union/find set, ignoring the request
   /// if the value already exists.
   const ECValue &insert(const ElemTy &Data) {
-    auto I = TheMapping.insert({Data, nullptr});
-    if (!I.second)
-      return *I.first->second;
+    auto [I, Inserted] = TheMapping.try_emplace(Data);
+    if (!Inserted)
+      return *I->second;
 
     auto *ECV = new (ECValueAllocator) ECValue(Data);
-    I.first->second = ECV;
+    I->second = ECV;
     Members.push_back(ECV);
     return *ECV;
   }
diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h
index 7bd2c87..81b9a68 100644
--- a/llvm/include/llvm/ADT/STLForwardCompat.h
+++ b/llvm/include/llvm/ADT/STLForwardCompat.h
@@ -55,21 +55,13 @@ using type_identity_t // NOLINT(readability-identifier-naming)
 
 // TODO: Remove this in favor of std::optional<T>::transform once we switch to
 // C++23.
-template <typename T, typename Function>
-auto transformOptional(const std::optional<T> &O, const Function &F)
-    -> std::optional<decltype(F(*O))> {
-  if (O)
-    return F(*O);
-  return std::nullopt;
-}
-
-// TODO: Remove this in favor of std::optional<T>::transform once we switch to
-// C++23.
-template <typename T, typename Function>
-auto transformOptional(std::optional<T> &&O, const Function &F)
-    -> std::optional<decltype(F(*std::move(O)))> {
-  if (O)
-    return F(*std::move(O));
+template <typename Optional, typename Function,
+          typename Value = typename llvm::remove_cvref_t<Optional>::value_type>
+std::optional<std::invoke_result_t<Function, Value>>
+transformOptional(Optional &&O, Function &&F) {
+  if (O) {
+    return F(*std::forward<Optional>(O));
+  }
   return std::nullopt;
 }
 
diff --git a/llvm/include/llvm/ADT/StringTable.h b/llvm/include/llvm/ADT/StringTable.h
index c089a07..575b3c9 100644
--- a/llvm/include/llvm/ADT/StringTable.h
+++ b/llvm/include/llvm/ADT/StringTable.h
@@ -118,6 +118,13 @@ public:
     constexpr Iterator(const Iterator &RHS) = default;
     constexpr Iterator(Iterator &&RHS) = default;
 
+    Iterator &operator=(const Iterator &RHS) {
+      Table = RHS.Table;
+      O = RHS.O;
+      S = RHS.S;
+      return *this;
+    }
+
     bool operator==(const Iterator &RHS) const {
       assert(Table == RHS.Table && "Compared iterators for unrelated tables!");
       return O == RHS.O;
@@ -132,6 +139,8 @@ public:
       O = O.value() + (*Table)[O].size() + 1;
       return *this;
     }
+
+    Offset offset() const { return O; }
   };
 
   constexpr Iterator begin() const { return Iterator(*this, 0); }
diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h
index 3d7edf0..d87457c 100644
--- a/llvm/include/llvm/Analysis/IR2Vec.h
+++ b/llvm/include/llvm/Analysis/IR2Vec.h
@@ -170,6 +170,10 @@ public:
   unsigned getDimension() const;
   size_t size() const;
 
+  static size_t expectedSize() {
+    return MaxOpcodes + MaxTypeIDs + MaxOperandKinds;
+  }
+
   /// Helper function to get vocabulary key for a given Opcode
   static StringRef getVocabKeyForOpcode(unsigned Opcode);
 
@@ -182,6 +186,11 @@ public:
   /// Helper function to classify an operand into OperandKind
   static OperandKind getOperandKind(const Value *Op);
 
+  /// Helpers to return the IDs of a given Opcode, TypeID, or OperandKind
+  static unsigned getNumericID(unsigned Opcode);
+  static unsigned getNumericID(Type::TypeID TypeID);
+  static unsigned getNumericID(const Value *Op);
+
   /// Accessors to get the embedding for a given entity.
   const ir2vec::Embedding &operator[](unsigned Opcode) const;
   const ir2vec::Embedding &operator[](Type::TypeID TypeId) const;
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index b985292..1dc7320 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -47,6 +47,8 @@ enum class RecurKind {
   FMul,     ///< Product of floats.
   FMin,     ///< FP min implemented in terms of select(cmp()).
   FMax,     ///< FP max implemented in terms of select(cmp()).
+  FMinNum,  ///< FP min with llvm.minnum semantics including NaNs.
+  FMaxNum,  ///< FP max with llvm.maxnum semantics including NaNs.
   FMinimum, ///< FP min with llvm.minimum semantics
   FMaximum, ///< FP max with llvm.maximum semantics
   FMinimumNum, ///< FP min with llvm.minimumnum semantics
@@ -250,6 +252,7 @@ public:
   /// Returns true if the recurrence kind is a floating-point min/max kind.
   static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
     return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
+           Kind == RecurKind::FMinNum || Kind == RecurKind::FMaxNum ||
            Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum ||
            Kind == RecurKind::FMinimumNum || Kind == RecurKind::FMaximumNum;
   }
diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index b042a71..571caf9 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -102,6 +102,12 @@ private:
   // The maximum size of a cold allocation context, from the profile summary.
   uint64_t MaxColdSize;
 
+  // Tracks whether we have built the Trie from existing MD_memprof metadata. We
+  // apply different heuristics for determining whether to discard non-cold
+  // contexts when rebuilding as we have lost information available during the
+  // original profile match.
+  bool BuiltFromExistingMetadata = false;
+
   void deleteTrieNode(CallStackTrieNode *Node) {
     if (!Node)
       return;
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index af1e0d7..9a2773c 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -24,6 +24,7 @@
 
 namespace llvm {
 class TargetLibraryInfo;
+class IntrinsicInst;
 
 /// The Vector Function Database.
 ///
@@ -188,6 +189,10 @@ LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID);
 /// Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
 LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID);
 
+/// Given a deinterleaveN intrinsic, return the (narrow) vector type of each
+/// factor.
+LLVM_ABI VectorType *getDeinterleavedVectorType(IntrinsicInst *DI);
+
 /// Given a vector and an element number, see if the scalar value is
 /// already around as a register, for example if it were inserted then extracted
 /// from the vector.
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index c7e4bdf..a2311d2 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -181,6 +181,7 @@ enum Kind {
   kw_amdgpu_cs_chain_preserve,
   kw_amdgpu_kernel,
   kw_amdgpu_gfx,
+  kw_amdgpu_gfx_whole_wave,
   kw_tailcc,
   kw_m68k_rtdcc,
   kw_graalcc,
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 6bf2e17..e4f82ad 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -1159,6 +1159,7 @@ enum : unsigned {
   SHT_LLVM_OFFLOADING = 0x6fff4c0b,         // LLVM device offloading data.
   SHT_LLVM_LTO = 0x6fff4c0c,                // .llvm.lto for fat LTO.
   SHT_LLVM_JT_SIZES = 0x6fff4c0d,           // LLVM jump tables sizes.
+  SHT_LLVM_CFI_JUMP_TABLE = 0x6fff4c0e,     // LLVM CFI jump table.
   // Android's experimental support for SHT_RELR sections.
   // https://android.googlesource.com/platform/bionic/+/b7feec74547f84559a1467aca02708ff61346d2a/libc/include/elf.h#512
   SHT_ANDROID_RELR = 0x6fffff00,   // Relocation entries; only offsets.
diff --git a/llvm/include/llvm/BinaryFormat/SFrame.h b/llvm/include/llvm/BinaryFormat/SFrame.h
index 16d3b16..98dbe38 100644
--- a/llvm/include/llvm/BinaryFormat/SFrame.h
+++ b/llvm/include/llvm/BinaryFormat/SFrame.h
@@ -15,33 +15,36 @@
 #ifndef LLVM_BINARYFORMAT_SFRAME_H
 #define LLVM_BINARYFORMAT_SFRAME_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Endian.h"
 
-namespace llvm::sframe {
+namespace llvm {
+
+template <typename T> struct EnumEntry;
+
+namespace sframe {
 
 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
 
 constexpr uint16_t Magic = 0xdee2;
 
 enum class Version : uint8_t {
-  V1 = 1,
-  V2 = 2,
+#define HANDLE_SFRAME_VERSION(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/SFrameConstants.def"
 };
 
 enum class Flags : uint8_t {
-  FDESorted = 0x01,
-  FramePointer = 0x02,
-  FDEFuncStartPCRel = 0x04,
+#define HANDLE_SFRAME_FLAG(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/SFrameConstants.def"
   V2AllFlags = FDESorted | FramePointer | FDEFuncStartPCRel,
   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xff),
 };
 
 enum class ABI : uint8_t {
-  AArch64EndianBig = 1,
-  AArch64EndianLittle = 2,
-  AMD64EndianLittle = 3,
+#define HANDLE_SFRAME_ABI(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/SFrameConstants.def"
 };
 
 /// SFrame FRE Types. Bits 0-3 of FuncDescEntry.Info.
@@ -160,6 +163,11 @@ template <endianness E> using FrameRowEntryAddr1 = FrameRowEntry<uint8_t, E>;
 template <endianness E> using FrameRowEntryAddr2 = FrameRowEntry<uint16_t, E>;
 template <endianness E> using FrameRowEntryAddr4 = FrameRowEntry<uint32_t, E>;
 
-} // namespace llvm::sframe
+ArrayRef<EnumEntry<Version>> getVersions();
+ArrayRef<EnumEntry<Flags>> getFlags();
+ArrayRef<EnumEntry<ABI>> getABIs();
+
+} // namespace sframe
+} // namespace llvm
 
 #endif // LLVM_BINARYFORMAT_SFRAME_H
diff --git a/llvm/include/llvm/BinaryFormat/SFrameConstants.def b/llvm/include/llvm/BinaryFormat/SFrameConstants.def
new file mode 100644
index 0000000..643b15f
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/SFrameConstants.def
@@ -0,0 +1,39 @@
+//===- SFrameConstants.def --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if !(defined(HANDLE_SFRAME_VERSION) || defined(HANDLE_SFRAME_FLAG) ||  \
+      defined(HANDLE_SFRAME_ABI))
+#error "Missing HANDLE_SFRAME definition"
+#endif
+
+#ifndef HANDLE_SFRAME_VERSION
+#define HANDLE_SFRAME_VERSION(CODE, NAME)
+#endif
+
+#ifndef HANDLE_SFRAME_FLAG
+#define HANDLE_SFRAME_FLAG(CODE, NAME)
+#endif
+
+#ifndef HANDLE_SFRAME_ABI
+#define HANDLE_SFRAME_ABI(CODE, NAME)
+#endif
+
+HANDLE_SFRAME_VERSION(0x01, V1)
+HANDLE_SFRAME_VERSION(0x02, V2)
+
+HANDLE_SFRAME_FLAG(0x01, FDESorted)
+HANDLE_SFRAME_FLAG(0x02, FramePointer)
+HANDLE_SFRAME_FLAG(0x04, FDEFuncStartPCRel)
+
+HANDLE_SFRAME_ABI(0x01, AArch64EndianBig)
+HANDLE_SFRAME_ABI(0x02, AArch64EndianLittle)
+HANDLE_SFRAME_ABI(0x03, AMD64EndianLittle)
+
+#undef HANDLE_SFRAME_VERSION
+#undef HANDLE_SFRAME_FLAG
+#undef HANDLE_SFRAME_ABI
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 31f1197b..da82904 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -700,18 +700,19 @@ public:
   /// Given an G_UDIV \p MI or G_UREM \p MI expressing a divide by constant,
   /// return an expression that implements it by multiplying by a magic number.
   /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
-  MachineInstr *buildUDivorURemUsingMul(MachineInstr &MI) const;
+  MachineInstr *buildUDivOrURemUsingMul(MachineInstr &MI) const;
   /// Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
-  bool matchUDivorURemByConst(MachineInstr &MI) const;
-  void applyUDivorURemByConst(MachineInstr &MI) const;
-
-  /// Given an G_SDIV \p MI expressing a signed divide by constant, return an
-  /// expression that implements it by multiplying by a magic number.
-  /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
-  MachineInstr *buildSDivUsingMul(MachineInstr &MI) const;
-  /// Combine G_SDIV by constant into a multiply by magic constant.
-  bool matchSDivByConst(MachineInstr &MI) const;
-  void applySDivByConst(MachineInstr &MI) const;
+  bool matchUDivOrURemByConst(MachineInstr &MI) const;
+  void applyUDivOrURemByConst(MachineInstr &MI) const;
+
+  /// Given an G_SDIV \p MI or G_SREM \p MI expressing a signed divide by
+  /// constant, return an expression that implements it by multiplying by a
+  /// magic number. Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's
+  /// Guide".
+  MachineInstr *buildSDivOrSRemUsingMul(MachineInstr &MI) const;
+  /// Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
+  bool matchSDivOrSRemByConst(MachineInstr &MI) const;
+  void applySDivOrSRemByConst(MachineInstr &MI) const;
 
   /// Given an G_SDIV \p MI expressing a signed divided by a pow2 constant,
   /// return expressions that implements it by shifting.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
index da73238..490d1a3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
@@ -103,6 +103,20 @@ public:
   /// \return The known alignment for the pointer-like value \p R.
   Align computeKnownAlignment(Register R, unsigned Depth = 0);
 
+  /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts
+  /// that are all less than the element bit-width of the shift node, return the
+  /// valid constant range.
+  std::optional<ConstantRange>
+  getValidShiftAmountRange(Register R, const APInt &DemandedElts,
+                           unsigned Depth);
+
+  /// If a G_SHL/G_ASHR/G_LSHR node with shift operand \p R has shift amounts
+  /// that are all less than the element bit-width of the shift node, return the
+  /// minimum possible value.
+  std::optional<uint64_t> getValidMinimumShiftAmount(Register R,
+                                                     const APInt &DemandedElts,
+                                                     unsigned Depth = 0);
+
   /// Determine which floating-point classes are valid for \p V, and return them
   /// in KnownFPClass bit sets.
   ///
diff --git a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index aef9190..7940752 100644
--- a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -39,8 +39,8 @@ class MachineBlockFrequencyInfo {
 public:
   LLVM_ABI MachineBlockFrequencyInfo(); // Legacy pass manager only.
   LLVM_ABI explicit MachineBlockFrequencyInfo(
-      MachineFunction &F, MachineBranchProbabilityInfo &MBPI,
-      MachineLoopInfo &MLI);
+      const MachineFunction &F, const MachineBranchProbabilityInfo &MBPI,
+      const MachineLoopInfo &MLI);
   LLVM_ABI MachineBlockFrequencyInfo(MachineBlockFrequencyInfo &&);
   LLVM_ABI ~MachineBlockFrequencyInfo();
 
diff --git a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
index 98a60c9..1d954cf 100644
--- a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -46,6 +46,11 @@ public:
   LLVM_ABI Result run(Function &F, FunctionAnalysisManager &FAM);
 };
 
+class FreeMachineFunctionPass : public PassInfoMixin<FreeMachineFunctionPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_MachineFunctionAnalysis
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 72594c7..1a548a5 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3201,11 +3201,15 @@ public:
   /// Lower an interleaved load to target specific intrinsics. Return
   /// true on success.
   ///
-  /// \p LI is the vector load instruction.
+  /// \p Load is the vector load instruction. Can be either a plain load
+  /// instruction or a vp.load intrinsic.
+  /// \p Mask is a per-segment (i.e. number of lanes equal to that of one
+  /// component being interwoven) mask.  Can be nullptr, in which case the
+  /// result is uncondiitional.
   /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector.
   /// \p Indices is the corresponding indices for each shufflevector.
   /// \p Factor is the interleave factor.
-  virtual bool lowerInterleavedLoad(LoadInst *LI,
+  virtual bool lowerInterleavedLoad(Instruction *Load, Value *Mask,
                                     ArrayRef<ShuffleVectorInst *> Shuffles,
                                     ArrayRef<unsigned> Indices,
                                     unsigned Factor) const {
@@ -3223,17 +3227,6 @@ public:
     return false;
   }
 
-  /// Lower an interleaved load to target specific intrinsics. Return
-  /// true on success.
-  ///
-  /// \p Load is a vp.load instruction.
-  /// \p Mask is a mask value
-  /// \p DeinterleaveRes is a list of deinterleaved results.
-  virtual bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
-                                      ArrayRef<Value *> DeinterleaveRes) const {
-    return false;
-  }
-
   /// Lower an interleaved store to target specific intrinsics. Return
   /// true on success.
   ///
@@ -3251,10 +3244,9 @@ public:
   ///
   /// \p Load is the accompanying load instruction.  Can be either a plain load
   /// instruction or a vp.load intrinsic.
-  /// \p DeinterleaveValues contains the deinterleaved values.
-  virtual bool
-  lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
-                                   ArrayRef<Value *> DeinterleaveValues) const {
+  /// \p DI represents the deinterleaveN intrinsic.
+  virtual bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
+                                                IntrinsicInst *DI) const {
     return false;
   }
 
@@ -3262,10 +3254,14 @@ public:
   /// Return true on success. Currently only supports
   /// llvm.vector.interleave{2,3,5,7}
   ///
-  /// \p SI is the accompanying store instruction
+  /// \p Store is the accompanying store instruction.  Can be either a plain
+  /// store or a vp.store intrinsic.
+  /// \p Mask is a per-segment (i.e. number of lanes equal to that of one
+  /// component being interwoven) mask.  Can be nullptr, in which case the
+  /// result is uncondiitional.
   /// \p InterleaveValues contains the interleaved values.
   virtual bool
-  lowerInterleaveIntrinsicToStore(StoreInst *SI,
+  lowerInterleaveIntrinsicToStore(Instruction *Store, Value *Mask,
                                   ArrayRef<Value *> InterleaveValues) const {
     return false;
   }
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.h b/llvm/include/llvm/Frontend/OpenMP/OMP.h
index d44c333..9d0a554 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.h
@@ -51,13 +51,17 @@ static constexpr inline bool canHaveIterator(Clause C) {
 // Can clause C create a private copy of a variable.
 static constexpr inline bool isPrivatizingClause(Clause C) {
   switch (C) {
+  case OMPC_detach:
   case OMPC_firstprivate:
+  // TODO case OMPC_induction:
   case OMPC_in_reduction:
+  case OMPC_is_device_ptr:
   case OMPC_lastprivate:
   case OMPC_linear:
   case OMPC_private:
   case OMPC_reduction:
   case OMPC_task_reduction:
+  case OMPC_use_device_ptr:
     return true;
   default:
     return false;
diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h
index d68491e..ef761eb 100644
--- a/llvm/include/llvm/IR/CallingConv.h
+++ b/llvm/include/llvm/IR/CallingConv.h
@@ -284,6 +284,9 @@ namespace CallingConv {
     RISCV_VLSCall_32768 = 122,
     RISCV_VLSCall_65536 = 123,
 
+    // Calling convention for AMDGPU whole wave functions.
+    AMDGPU_Gfx_WholeWave = 124,
+
     /// The highest possible ID. Must be some 2^k - 1.
     MaxID = 1023
   };
@@ -294,8 +297,13 @@ namespace CallingConv {
 /// directly or indirectly via a call-like instruction.
 constexpr bool isCallableCC(CallingConv::ID CC) {
   switch (CC) {
+  // Called with special intrinsics:
+  // llvm.amdgcn.cs.chain
   case CallingConv::AMDGPU_CS_Chain:
   case CallingConv::AMDGPU_CS_ChainPreserve:
+  // llvm.amdgcn.call.whole.wave
+  case CallingConv::AMDGPU_Gfx_WholeWave:
+  // Hardware entry points:
   case CallingConv::AMDGPU_CS:
   case CallingConv::AMDGPU_ES:
   case CallingConv::AMDGPU_GS:
diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h
index 77cee87..c529a86 100644
--- a/llvm/include/llvm/IR/DebugInfo.h
+++ b/llvm/include/llvm/IR/DebugInfo.h
@@ -39,30 +39,26 @@ class DbgVariableRecord;
 class Instruction;
 class Module;
 
-/// Finds dbg.declare intrinsics declaring local variables as living in the
+/// Finds dbg.declare records declaring local variables as living in the
 /// memory that 'V' points to.
-LLVM_ABI TinyPtrVector<DbgDeclareInst *> findDbgDeclares(Value *V);
-/// As above, for DVRDeclares.
 LLVM_ABI TinyPtrVector<DbgVariableRecord *> findDVRDeclares(Value *V);
 /// As above, for DVRValues.
 LLVM_ABI TinyPtrVector<DbgVariableRecord *> findDVRValues(Value *V);
 
-/// Finds the llvm.dbg.value intrinsics describing a value.
-LLVM_ABI void findDbgValues(
-    SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V,
-    SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords = nullptr);
-
-/// Finds the debug info intrinsics describing a value.
-LLVM_ABI void findDbgUsers(
-    SmallVectorImpl<DbgVariableIntrinsic *> &DbgInsts, Value *V,
-    SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords = nullptr);
+/// Finds the debug info records describing a value.
+LLVM_ABI void
+findDbgUsers(Value *V,
+             SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords);
+/// Finds the dbg.values describing a value.
+LLVM_ABI void
+findDbgValues(Value *V,
+              SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords);
 
 /// Find subprogram that is enclosing this scope.
 LLVM_ABI DISubprogram *getDISubprogram(const MDNode *Scope);
 
 /// Produce a DebugLoc to use for each dbg.declare that is promoted to a
 /// dbg.value.
-LLVM_ABI DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII);
 LLVM_ABI DebugLoc getDebugValueLoc(DbgVariableRecord *DVR);
 
 /// Strip debug info in the module if it exists.
@@ -115,8 +111,7 @@ public:
   LLVM_ABI void processVariable(DILocalVariable *DVI);
   /// Process debug info location.
   LLVM_ABI void processLocation(const Module &M, const DILocation *Loc);
-  /// Process a DbgRecord (e.g, treat a DbgVariableRecord like a
-  /// DbgVariableIntrinsic).
+  /// Process a DbgRecord.
   LLVM_ABI void processDbgRecord(const Module &M, const DbgRecord &DR);
 
   /// Process subprogram.
@@ -193,13 +188,6 @@ using AssignmentInstRange =
 /// Iterators invalidated by adding or removing DIAssignID metadata to/from any
 /// instruction (including by deleting or cloning instructions).
 LLVM_ABI AssignmentInstRange getAssignmentInsts(DIAssignID *ID);
-/// Return a range of instructions (typically just one) that perform the
-/// assignment that \p DAI encodes.
-/// Iterators invalidated by adding or removing DIAssignID metadata to/from any
-/// instruction (including by deleting or cloning instructions).
-inline AssignmentInstRange getAssignmentInsts(const DbgAssignIntrinsic *DAI) {
-  return getAssignmentInsts(DAI->getAssignID());
-}
 
 inline AssignmentInstRange getAssignmentInsts(const DbgVariableRecord *DVR) {
   assert(DVR->isDbgAssign() &&
@@ -290,8 +278,6 @@ struct VarRecord {
   DILocalVariable *Var;
   DILocation *DL;
 
-  VarRecord(DbgVariableIntrinsic *DVI)
-      : Var(DVI->getVariable()), DL(getDebugValueLoc(DVI)) {}
   VarRecord(DbgVariableRecord *DVR)
       : Var(DVR->getVariable()), DL(getDebugValueLoc(DVR)) {}
   VarRecord(DILocalVariable *Var, DILocation *DL) : Var(Var), DL(DL) {}
diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index 9345f95..f1f0c189 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -66,7 +66,6 @@ namespace dwarf {
 enum Tag : uint16_t;
 }
 
-class DbgVariableIntrinsic;
 class DbgVariableRecord;
 
 LLVM_ABI extern cl::opt<bool> EnableFSDiscriminator;
@@ -4613,7 +4612,6 @@ class DebugVariable {
   LLVM_ABI static const FragmentInfo DefaultFragment;
 
 public:
-  LLVM_ABI DebugVariable(const DbgVariableIntrinsic *DII);
   LLVM_ABI DebugVariable(const DbgVariableRecord *DVR);
 
   DebugVariable(const DILocalVariable *Var,
@@ -4681,7 +4679,6 @@ template <> struct DenseMapInfo<DebugVariable> {
 /// information).
 class DebugVariableAggregate : public DebugVariable {
 public:
-  LLVM_ABI DebugVariableAggregate(const DbgVariableIntrinsic *DVI);
   DebugVariableAggregate(const DebugVariable &V)
       : DebugVariable(V.getVariable(), std::nullopt, V.getInlinedAt()) {}
 };
diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def
index df572e8..90276ea 100644
--- a/llvm/include/llvm/IR/FixedMetadataKinds.def
+++ b/llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -53,3 +53,4 @@ LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38)
 LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39)
 LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40)
 LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41)
+LLVM_FIXED_MD_KIND(MD_callee_type, "callee_type", 42)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index d8fda0e..8bfa345 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -3572,6 +3572,12 @@ def int_amdgcn_cvt_f16_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_bf8">,
             [llvm_i32_ty, llvm_i32_ty],
             [IntrNoMem, ImmArg<ArgIndex<1>>]>;
 
+def int_amdgcn_sat_pk4_i4_i8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_i4_i8">,
+  DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
+
+def int_amdgcn_sat_pk4_u4_u8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_u4_u8">,
+  DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
+
 //===----------------------------------------------------------------------===//
 // Special Intrinsics for backend internal use only. No frontend
 // should emit calls to these.
@@ -3711,6 +3717,20 @@ class AMDGPUWmmaIntrinsicModsAllDiff<LLVMType DstTy, LLVMType AB, LLVMType C> :
      IntrWillReturn, IntrNoCallback, IntrNoFree]
 >;
 
+class AMDGPUWmmaIntrinsicModsC_MatrixFMT :
+  Intrinsic<
+    [llvm_anyfloat_ty], // %D
+    [
+      llvm_i32_ty,      // matrix_a_fmt
+      llvm_anyint_ty,   // %A
+      llvm_i32_ty,      // matrix_b_fmt
+      llvm_anyint_ty,   // %B
+      llvm_i16_ty,      // %C_mod: 0 - none, 1 - neg, 2 - abs, 3 - neg(abs)
+      LLVMMatchType<0>, // %C
+    ],
+    [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
+>;
+
 defset list<Intrinsic> AMDGPUWMMAIntrinsicsGFX1250 = {
 def int_amdgcn_wmma_f32_16x16x4_f32       : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>;
 def int_amdgcn_wmma_f32_16x16x32_bf16     : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>;
@@ -3735,6 +3755,7 @@ def int_amdgcn_wmma_f32_16x16x128_fp8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint
 def int_amdgcn_wmma_f32_16x16x128_bf8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
 def int_amdgcn_wmma_f32_16x16x128_bf8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
 def int_amdgcn_wmma_i32_16x16x64_iu8      : AMDGPUWmmaIntrinsicModsAB<llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_wmma_f32_16x16x128_f8f6f4  : AMDGPUWmmaIntrinsicModsC_MatrixFMT;
 def int_amdgcn_wmma_f32_32x16x128_f4       : AMDGPUWmmaIntrinsicF4ModsC<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty>;
 }
 
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 0375f29..967d166 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -331,6 +331,11 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
     !eq(gf,"m8n16:x2") : !listsplat(llvm_i32_ty, 2),
     !eq(gf,"m8n16:x4") : !listsplat(llvm_i32_ty, 4),
 
+    // stmatrix b8 -> s32 @ m16n8
+    !eq(gf,"m16n8:x1") : !listsplat(llvm_i32_ty, 1),
+    !eq(gf,"m16n8:x2") : !listsplat(llvm_i32_ty, 2),
+    !eq(gf,"m16n8:x4") : !listsplat(llvm_i32_ty, 4),
+
   );
 }
 
@@ -403,6 +408,17 @@ class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> {
                   !subst("llvm.", "int_", intr));
 }
 
+class STMATRIX_NAME<WMMA_REGS Frag, int Trans> {
+  string intr = "llvm.nvvm.stmatrix.sync.aligned"
+                # "." # Frag.geom
+                # "." # Frag.frag
+                # !if(Trans, ".trans", "")
+                # "." # Frag.ptx_elt_type
+                ;
+  string record = !subst(".", "_",
+                  !subst("llvm.", "int_", intr));
+}
+
 // Generates list of 4-tuples of WMMA_REGS representing a valid MMA op.
 //   Geom: list of supported geometries.
 //   TypeN: PTX type of the corresponding fragment's element.
@@ -443,6 +459,16 @@ class LDMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
    list<string> ops = !foreach(x, ret, x.gft);
 }
 
+class STMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
+  list<WMMA_REGS> ret =
+     !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
+     !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
+     !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
+            [WMMA_REGS<geom, frag, type>]))))));
+   // Debugging aid for readable representation of the list above.
+   list<string> ops = !foreach(x, ret, x.gft);
+}
+
 // Creates list of valid combinations of fragments. This is the main list that
 // drives generation of corresponding intrinsics and instructions.
 class NVVM_MMA_OPS {
@@ -537,9 +563,18 @@ class NVVM_MMA_OPS {
   list<WMMA_REGS> ldmatrix_geom_m8n16_ops = LDMATRIX_OPS<
     ["m8n16"], ["x1", "x2", "x4"], ["b8x16.b6x16_p32", "b8x16.b4x16_p64"]>.ret;
 
+  list<WMMA_REGS> stmatrix_b16_ops = STMATRIX_OPS<
+    ["m8n8"], ["x1", "x2", "x4"], ["b16"]>.ret;
+
+  list<WMMA_REGS> stmatrix_b8_ops = STMATRIX_OPS<
+    ["m16n8"], ["x1", "x2", "x4"], ["b8"]>.ret;
+
   list<WMMA_REGS> all_ldmatrix_ops = !listconcat(ldmatrix_b16_ops,
                                                  ldmatrix_geom_m16n16_ops,
                                                  ldmatrix_geom_m8n16_ops);
+
+  list<WMMA_REGS> all_stmatrix_ops = !listconcat(stmatrix_b16_ops,
+                                                 stmatrix_b8_ops);
 }
 
 def NVVM_MMA_OPS : NVVM_MMA_OPS;
@@ -680,6 +715,19 @@ class NVVM_LDMATRIX_SUPPORTED<WMMA_REGS frag, bit trans> {
   );
 }
 
+// Returns true if the fragment is valid for stmatrix ops is supported;
+// false otherwise.
+class NVVM_STMATRIX_SUPPORTED<WMMA_REGS frag, bit trans> {
+  string g = frag.geom;
+  string t = frag.ptx_elt_type;
+
+  bit ret = !cond(
+    !and(!eq(g, "m8n8"), !eq(t, "b16")): true,
+    !and(!eq(g, "m16n8"), !eq(t, "b8"), !eq(trans, 1)): true,
+    true: false
+  );
+}
+
 class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
   string Suffix = !if(sync, "sync_", "")
                   # mode # "_"
@@ -1969,6 +2017,23 @@ foreach transposed = [0, 1] in {
   }
 }
 
+// STMATRIX
+class NVVM_STMATRIX<WMMA_REGS Frag, int Transposed>
+  : Intrinsic<[],
+          !listconcat([llvm_anyptr_ty], Frag.regs),
+          [IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
+           WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
+          STMATRIX_NAME<Frag, Transposed>.intr>;
+
+foreach transposed = [0, 1] in {
+  foreach frag = NVVM_MMA_OPS.all_stmatrix_ops in {
+    if NVVM_STMATRIX_SUPPORTED<frag, transposed>.ret then {
+      def STMATRIX_NAME<frag, transposed>.record
+        : NVVM_STMATRIX<frag, transposed>;
+    }
+  }
+}
+
 // MAPA
 let IntrProperties = [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>] in {
   def int_nvvm_mapa
@@ -2024,9 +2089,7 @@ foreach dim = 1...5 in {
                       tensor_dim_args,      // actual tensor dims
                       [llvm_i64_ty]),       // cache_hint
           [llvm_i1_ty],                     // Flag for cache_hint
-          [IntrConvergent,
-           ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
-           NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>]>;
+          [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
 
     // Intrinsics for TMA Copy with reduction
     foreach red_op = ["add", "min", "max", "inc", "dec", "and", "or", "xor"] in
@@ -2037,18 +2100,31 @@ foreach dim = 1...5 in {
                          tensor_dim_args,     // actual tensor dims
                         [llvm_i64_ty]),       // cache_hint
           [llvm_i1_ty],                       // Flag for cache_hint
-          [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
-           NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>]>;
+          [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
   }
 }
 
+// TMA S2G tile::scatter4
+def int_nvvm_cp_async_bulk_tensor_s2g_tile_scatter4_2d
+  : DefaultAttrsIntrinsicFlags<[],
+      !listconcat([llvm_shared_ptr_ty,        // src_smem_ptr
+                   llvm_ptr_ty],              // tensormap_ptr
+                  !listsplat(llvm_i32_ty, 5), // dims
+                  [llvm_i64_ty]),             // cache_hint
+      [llvm_i1_ty],                           // Flag for cache_hint
+      [IntrConvergent, ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
+
 // TMA Tensor Copy Intrinsics: G2S -> From Global to Shared memory variants
 foreach dim = 1...5 in {
   defvar tensor_dim_args = !listsplat(llvm_i32_ty, dim);
 
-  foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
+  foreach mode = !if(!ge(dim, 3), ["tile", "im2col", "im2col_w", "im2col_w_128"], ["tile"]) in {
     defvar is_im2col = !eq(mode, "im2col");
-    defvar num_im2col_offsets = !if(is_im2col, !add(dim, -2), 0);
+    defvar is_im2colw = !or(!eq(mode, "im2col_w"), !eq(mode, "im2col_w_128"));
+
+    // For im2col_w/w128 modes, the num_offsets is always 2.
+    // For im2col mode, the num_offsets is (dim - 2).
+    defvar num_im2col_offsets = !if(is_im2colw, 2, !if(is_im2col, !add(dim, -2), 0));
     defvar im2col_offsets_args = !listsplat(llvm_i16_ty, num_im2col_offsets);
 
     defvar g2s_params = !listconcat(
@@ -2079,11 +2155,60 @@ foreach dim = 1...5 in {
                        im2col_offsets_args, // im2col offsets
                       [llvm_i64_ty]),       // cache_hint
           [llvm_i1_ty],                     // Flag for cache_hint
-          [IntrConvergent,
-           ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+          [IntrConvergent, ReadOnly<ArgIndex<0>>]>;
+
+    def int_nvvm_cp_async_bulk_tensor_g2s_cta_ # mode # _ # dim # d :
+      DefaultAttrsIntrinsicFlags<[],
+          !listconcat([llvm_shared_ptr_ty,  // dst_ptr
+                       llvm_shared_ptr_ty,  // mbarrier_ptr
+                       llvm_ptr_ty],        // tensormap_ptr
+                       tensor_dim_args,     // actual tensor dims
+                       im2col_offsets_args, // im2col offsets
+                       [llvm_i64_ty]),      // cache_hint
+          [llvm_i1_ty],                     // Flag for cache_hint
+          [IntrConvergent, WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>]>;
   }
 }
 
+// TMA copy for tile::gather4
+def int_nvvm_cp_async_bulk_tensor_g2s_tile_gather4_2d
+  : DefaultAttrsIntrinsicFlags<[],
+      !listconcat(
+      [llvm_shared_cluster_ptr_ty, // dst_shared_cluster_ptr
+       llvm_shared_ptr_ty,         // mbarrier_ptr
+       llvm_ptr_ty],               // tensormap_ptr
+       !listsplat(llvm_i32_ty, 5), // co-ordinates
+      [llvm_i16_ty,                // cta_mask
+       llvm_i64_ty]),              // cache_hint
+      [llvm_i1_ty,                 // Flag for cta_mask
+       llvm_i1_ty,                 // Flag for cache_hint
+       llvm_i32_ty],               // Flag for cta_group
+      [IntrConvergent,
+       WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
+       // Allowed values for cta_group are {0,1,2} i.e [0, 3).
+       Range<ArgIndex<12>, 0, 3>]>;
+
+def int_nvvm_cp_async_bulk_tensor_g2s_cta_tile_gather4_2d
+  : DefaultAttrsIntrinsicFlags<[],
+      !listconcat(
+      [llvm_shared_ptr_ty,         // dst_shared_ptr
+       llvm_shared_ptr_ty,         // mbarrier_ptr
+       llvm_ptr_ty],               // tensormap_ptr
+       !listsplat(llvm_i32_ty, 5), // co-ordinates
+      [llvm_i64_ty]),              // cache_hint
+      [llvm_i1_ty],                // Flag for cache_hint
+      [IntrConvergent,
+       WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>]>;
+
+// TMA prefetch for tile::gather4
+def int_nvvm_cp_async_bulk_tensor_prefetch_tile_gather4_2d
+  : DefaultAttrsIntrinsicFlags<[],
+      !listconcat([llvm_ptr_ty],               // tensormap_ptr
+                  !listsplat(llvm_i32_ty, 5),  // co-ordinates
+                  [llvm_i64_ty]),              // cache_hint
+      [llvm_i1_ty],                            // Flag for cache_hint
+      [IntrConvergent, ReadOnly<ArgIndex<0>>]>;
+
 // Intrinsics for Prefetch and Prefetchu
 let IntrProperties = [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>] in {
   foreach level = ["L1", "L2"] in {
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 35c9cd6..b5f0cdf 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -90,7 +90,12 @@ let TargetPrefix = "spv" in {
   def int_spv_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
   def int_spv_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
   def int_spv_reflect : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
-  def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
+  def int_spv_refract
+      : DefaultAttrsIntrinsic<[LLVMMatchType<0>],
+                              [llvm_anyfloat_ty, LLVMMatchType<0>,
+                              llvm_anyfloat_ty],
+                              [IntrNoMem]>;
+def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
   def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_smoothstep : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty], [IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 2de26c0..af252aa 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -1255,6 +1255,13 @@ public:
   bool isReplaceable() const { return isTemporary() || isAlwaysReplaceable(); }
   bool isAlwaysReplaceable() const { return getMetadataID() == DIAssignIDKind; }
 
+  /// Check if this is a valid generalized type metadata node.
+  bool hasGeneralizedMDString() {
+    if (getNumOperands() < 2 || !isa<MDString>(getOperand(1)))
+      return false;
+    return cast<MDString>(getOperand(1))->getString().ends_with(".generalized");
+  }
+
   unsigned getNumTemporaryUses() const {
     assert(isTemporary() && "Only for temporaries");
     return Context.getReplaceableUses()->getNumUses();
@@ -1467,6 +1474,8 @@ public:
                                                 const Instruction *BInstr);
   LLVM_ABI static MDNode *getMergedMemProfMetadata(MDNode *A, MDNode *B);
   LLVM_ABI static MDNode *getMergedCallsiteMetadata(MDNode *A, MDNode *B);
+  LLVM_ABI static MDNode *getMergedCalleeTypeMetadata(const MDNode *A,
+                                                      const MDNode *B);
 };
 
 /// Tuple of metadata.
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index 737610b..0fd5de3 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -112,7 +112,6 @@ inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
     return false;
   }
   llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
-  return false;
 }
 
 inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
@@ -179,7 +178,6 @@ inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
   }
   llvm_unreachable(
       "Checking invalid f2i/d2i intrinsic for signed int conversion");
-  return false;
 }
 
 inline APFloat::roundingMode
@@ -250,7 +248,6 @@ GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) {
     return APFloat::rmTowardZero;
   }
   llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
-  return APFloat::roundingMode::Invalid;
 }
 
 inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
@@ -280,7 +277,6 @@ inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
     return false;
   }
   llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
-  return false;
 }
 
 inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
@@ -310,7 +306,6 @@ inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
     return false;
   }
   llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
-  return false;
 }
 
 inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
@@ -340,7 +335,83 @@ inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
     return false;
   }
   llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
-  return false;
+}
+
+inline bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_ceil_ftz_f:
+  case Intrinsic::nvvm_fabs_ftz:
+  case Intrinsic::nvvm_floor_ftz_f:
+  case Intrinsic::nvvm_round_ftz_f:
+  case Intrinsic::nvvm_saturate_ftz_f:
+  case Intrinsic::nvvm_sqrt_rn_ftz_f:
+    return true;
+  case Intrinsic::nvvm_ceil_f:
+  case Intrinsic::nvvm_ceil_d:
+  case Intrinsic::nvvm_fabs:
+  case Intrinsic::nvvm_floor_f:
+  case Intrinsic::nvvm_floor_d:
+  case Intrinsic::nvvm_round_f:
+  case Intrinsic::nvvm_round_d:
+  case Intrinsic::nvvm_saturate_d:
+  case Intrinsic::nvvm_saturate_f:
+  case Intrinsic::nvvm_sqrt_f:
+  case Intrinsic::nvvm_sqrt_rn_d:
+  case Intrinsic::nvvm_sqrt_rn_f:
+    return false;
+  }
+  llvm_unreachable("Checking FTZ flag for invalid unary intrinsic");
+}
+
+inline bool RCPShouldFTZ(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_rcp_rm_ftz_f:
+  case Intrinsic::nvvm_rcp_rn_ftz_f:
+  case Intrinsic::nvvm_rcp_rp_ftz_f:
+  case Intrinsic::nvvm_rcp_rz_ftz_f:
+    return true;
+  case Intrinsic::nvvm_rcp_rm_d:
+  case Intrinsic::nvvm_rcp_rm_f:
+  case Intrinsic::nvvm_rcp_rn_d:
+  case Intrinsic::nvvm_rcp_rn_f:
+  case Intrinsic::nvvm_rcp_rp_d:
+  case Intrinsic::nvvm_rcp_rp_f:
+  case Intrinsic::nvvm_rcp_rz_d:
+  case Intrinsic::nvvm_rcp_rz_f:
+    return false;
+  }
+  llvm_unreachable("Checking FTZ flag for invalid rcp intrinsic");
+}
+
+inline APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_rcp_rm_f:
+  case Intrinsic::nvvm_rcp_rm_d:
+  case Intrinsic::nvvm_rcp_rm_ftz_f:
+    return APFloat::rmTowardNegative;
+
+  case Intrinsic::nvvm_rcp_rn_f:
+  case Intrinsic::nvvm_rcp_rn_d:
+  case Intrinsic::nvvm_rcp_rn_ftz_f:
+    return APFloat::rmNearestTiesToEven;
+
+  case Intrinsic::nvvm_rcp_rp_f:
+  case Intrinsic::nvvm_rcp_rp_d:
+  case Intrinsic::nvvm_rcp_rp_ftz_f:
+    return APFloat::rmTowardPositive;
+
+  case Intrinsic::nvvm_rcp_rz_f:
+  case Intrinsic::nvvm_rcp_rz_d:
+  case Intrinsic::nvvm_rcp_rz_ftz_f:
+    return APFloat::rmTowardZero;
+  }
+  llvm_unreachable("Checking rounding mode for invalid rcp intrinsic");
+}
+
+inline DenormalMode GetNVVMDenromMode(bool ShouldFTZ) {
+  if (ShouldFTZ)
+    return DenormalMode::getPreserveSign();
+  return DenormalMode::getIEEE();
 }
 
 } // namespace nvvm
diff --git a/llvm/include/llvm/IR/OptBisect.h b/llvm/include/llvm/IR/OptBisect.h
index ea3c1de..d813ae9 100644
--- a/llvm/include/llvm/IR/OptBisect.h
+++ b/llvm/include/llvm/IR/OptBisect.h
@@ -15,6 +15,7 @@
 #define LLVM_IR_OPTBISECT_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/Support/Compiler.h"
 #include <limits>
 
@@ -82,8 +83,38 @@ private:
   mutable int LastBisectNum = 0;
 };
 
-/// Singleton instance of the OptBisect class, so multiple pass managers don't
-/// need to coordinate their uses of OptBisect.
+/// This class implements a mechanism to disable passes and individual
+/// optimizations at compile time based on a command line option
+/// (-opt-disable) in order to study how single transformations, or
+/// combinations thereof, affect the IR.
+class LLVM_ABI OptDisable : public OptPassGate {
+public:
+  /// Checks the pass name to determine if the specified pass should run.
+  ///
+  /// It returns true if the pass should run, i.e. if its name is was
+  /// not provided via command line.
+  /// If -opt-disable-enable-verbosity is given, the method prints the
+  /// name of the pass, and whether or not the pass will be executed.
+  ///
+  /// Most passes should not call this routine directly. Instead, it is called
+  /// through helper routines provided by the base classes of the pass. For
+  /// instance, function passes should call FunctionPass::skipFunction().
+  bool shouldRunPass(StringRef PassName,
+                     StringRef IRDescription) const override;
+
+  /// Parses the command line argument to extract the names of the passes
+  /// to be disabled. Multiple pass names can be provided with comma separation.
+  void setDisabled(StringRef Pass);
+
+  /// isEnabled() should return true before calling shouldRunPass().
+  bool isEnabled() const override { return !DisabledPasses.empty(); }
+
+private:
+  StringSet<> DisabledPasses = {};
+};
+
+/// Singleton instance of the OptPassGate class, so multiple pass managers don't
+/// need to coordinate their uses of OptBisect and OptDisable.
 LLVM_ABI OptPassGate &getGlobalPassGate();
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h
index 4f44ae5..ea8226c 100644
--- a/llvm/include/llvm/IR/PassManager.h
+++ b/llvm/include/llvm/IR/PassManager.h
@@ -491,6 +491,22 @@ public:
   /// invalidate them, unless they are preserved by the PreservedAnalyses set.
   void invalidate(IRUnitT &IR, const PreservedAnalyses &PA);
 
+  /// Directly clear a cached analysis for an IR unit.
+  ///
+  /// Using invalidate() over this is preferred unless you are really
+  /// sure you want to *only* clear this analysis without asking if it is
+  /// invalid.
+  template <typename AnalysisT> void clearAnalysis(IRUnitT &IR) {
+    AnalysisResultListT &ResultsList = AnalysisResultLists[&IR];
+    AnalysisKey *ID = AnalysisT::ID();
+
+    auto I =
+        llvm::find_if(ResultsList, [&ID](auto &E) { return E.first == ID; });
+    assert(I != ResultsList.end() && "Analysis must be available");
+    ResultsList.erase(I);
+    AnalysisResults.erase({ID, &IR});
+  }
+
 private:
   /// Look up a registered analysis pass.
   PassConceptT &lookUpPass(AnalysisKey *ID) {
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h
index 8058c8a..89ad4e5 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.h
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.h
@@ -132,6 +132,10 @@ struct RuntimeLibcallsInfo {
     return ImplToLibcall[Impl];
   }
 
+  /// Check if this is valid libcall for the current module, otherwise
+  /// RTLIB::Unsupported.
+  RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const;
+
 private:
   static const RTLIB::LibcallImpl
       DefaultLibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1];
@@ -156,6 +160,14 @@ private:
   /// Map from a concrete LibcallImpl implementation to its RTLIB::Libcall kind.
   LLVM_ABI static const RTLIB::Libcall ImplToLibcall[RTLIB::NumLibcallImpls];
 
+  /// Check if a function name is a recognized runtime call of any kind. This
+  /// does not consider if this call is available for any current compilation,
+  /// just that it is a known call somewhere. This returns the set of all
+  /// LibcallImpls which match the name; multiple implementations with the same
+  /// name may exist but differ in interpretation based on the target context.
+  LLVM_ABI static iterator_range<ArrayRef<uint16_t>::const_iterator>
+  getRecognizedLibcallImpls(StringRef FuncName);
+
   static bool darwinHasSinCosStret(const Triple &TT) {
     if (!TT.isOSDarwin())
       return false;
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index 11926d4..f0297cd 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -2129,7 +2129,7 @@ defvar X86CommonLibcalls =
 );
 
 defvar Windows32DivRemMulCalls =
-  LibcallImpls<(add WindowsDivRemMulLibcalls),
+  LibcallsWithCC<(add WindowsDivRemMulLibcalls), X86_STDCALL,
   RuntimeLibcallPredicate<"TT.isWindowsMSVCEnvironment() || TT.isWindowsItaniumEnvironment()">>;
 
 def X86_32SystemLibrary
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index 0322cbe..bfc1175 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -18,9 +18,7 @@
 
 namespace llvm {
 
-class MCAlignFragment;
 class MCFragment;
-class MCLEBFragment;
 class MCSymbol;
 class MCAssembler;
 class MCContext;
@@ -60,6 +58,9 @@ protected: // Can only create subclasses.
 
   MCAssembler *Asm = nullptr;
 
+  bool AllowAutoPadding = false;
+  bool AllowEnhancedRelaxation = false;
+
 public:
   MCAsmBackend(const MCAsmBackend &) = delete;
   MCAsmBackend &operator=(const MCAsmBackend &) = delete;
@@ -73,11 +74,11 @@ public:
 
   /// Return true if this target might automatically pad instructions and thus
   /// need to emit padding enable/disable directives around sensative code.
-  virtual bool allowAutoPadding() const { return false; }
+  bool allowAutoPadding() const { return AllowAutoPadding; }
   /// Return true if this target allows an unrelaxable instruction to be
   /// emitted into RelaxableFragment and then we can increase its size in a
   /// tricky way for optimization.
-  virtual bool allowEnhancedRelaxation() const { return false; }
+  bool allowEnhancedRelaxation() const { return AllowEnhancedRelaxation; }
 
   /// lifetime management
   virtual void reset() {}
@@ -105,21 +106,6 @@ public:
   /// Get information on a fixup kind.
   virtual MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const;
 
-  /// Hook to check if extra nop bytes must be inserted for alignment directive.
-  /// For some targets this may be necessary in order to support linker
-  /// relaxation. The number of bytes to insert are returned in Size.
-  virtual bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF,
-                                                     unsigned &Size) {
-    return false;
-  }
-
-  /// Hook which indicates if the target requires a fixup to be generated when
-  /// handling an align directive in an executable section
-  virtual bool shouldInsertFixupForCodeAlign(MCAssembler &Asm,
-                                             MCAlignFragment &AF) {
-    return false;
-  }
-
   // Evaluate a fixup, returning std::nullopt to use default handling for
   // `Value` and `IsResolved`. Otherwise, returns `IsResolved` with the
   // expectation that the hook updates `Value`.
@@ -177,6 +163,10 @@ public:
   }
 
   // Defined by linker relaxation targets.
+
+  // Return false to use default handling. Otherwise, set `Size` to the number
+  // of padding bytes.
+  virtual bool relaxAlign(MCFragment &F, unsigned &Size) { return false; }
   virtual bool relaxDwarfLineAddr(MCFragment &, bool &WasRelaxed) const {
     return false;
   }
diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h
index aa396ef..467ad4e 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -36,8 +36,6 @@ class MCCVDefRangeFragment;
 class MCCVInlineLineTableFragment;
 class MCFragment;
 class MCFixup;
-class MCLEBFragment;
-class MCPseudoProbeAddrFragment;
 class MCSymbolRefExpr;
 class raw_ostream;
 class MCAsmBackend;
@@ -69,6 +67,13 @@ private:
 
   SmallVector<const MCSymbol *, 0> Symbols;
 
+  struct RelocDirective {
+    const MCExpr &Offset;
+    const MCExpr *Expr;
+    uint32_t Kind;
+  };
+  SmallVector<RelocDirective, 0> relocDirectives;
+
   mutable SmallVector<std::pair<SMLoc, std::string>, 0> PendingErrors;
 
   MCDwarfLineTableParams LTParams;
@@ -116,7 +121,6 @@ private:
   bool relaxCVInlineLineTable(MCCVInlineLineTableFragment &DF);
   bool relaxCVDefRange(MCCVDefRangeFragment &DF);
   bool relaxFill(MCFillFragment &F);
-  bool relaxPseudoProbeAddr(MCPseudoProbeAddrFragment &DF);
 
 public:
   /// Construct a new assembler instance.
@@ -205,6 +209,7 @@ public:
 
   LLVM_ABI bool registerSection(MCSection &Section);
   LLVM_ABI bool registerSymbol(const MCSymbol &Symbol);
+  void addRelocDirective(RelocDirective RD);
 
   LLVM_ABI void reportError(SMLoc L, const Twine &Msg) const;
   // Record pending errors during layout iteration, as they may go away once the
diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
index 3a7ca1a..cae2fbc 100644
--- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
+++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
@@ -136,6 +136,18 @@ public:
                                       ArrayRef<uint8_t> Bytes, uint64_t Address,
                                       raw_ostream &CStream) const = 0;
 
+  /// Returns the disassembly of an instruction bundle for VLIW architectures
+  /// like Hexagon.
+  ///
+  /// \param Instr    - An MCInst to populate with the contents of
+  /// the Bundle with sub-instructions encoded as Inst operands.
+  virtual DecodeStatus getInstructionBundle(MCInst &Instr, uint64_t &Size,
+                                            ArrayRef<uint8_t> Bytes,
+                                            uint64_t Address,
+                                            raw_ostream &CStream) const {
+    return Fail;
+  }
+
   /// Used to perform separate target specific disassembly for a particular
   /// symbol. May parse any prelude that precedes instructions after the
   /// start of a symbol, or the entire symbol.
diff --git a/llvm/include/llvm/MC/MCELFStreamer.h b/llvm/include/llvm/MC/MCELFStreamer.h
index ad0961c..144f6bc 100644
--- a/llvm/include/llvm/MC/MCELFStreamer.h
+++ b/llvm/include/llvm/MC/MCELFStreamer.h
@@ -141,7 +141,8 @@ public:
   }
 
 private:
-  void finalizeCGProfileEntry(const MCSymbolRefExpr *&S, uint64_t Offset);
+  void finalizeCGProfileEntry(const MCSymbolRefExpr *Sym, uint64_t Offset,
+                              const MCSymbolRefExpr *&S);
   void finalizeCGProfile();
 
   bool SeenIdent = false;
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index e2a77b8..2ceeba2 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -40,14 +40,6 @@ class MCObjectStreamer : public MCStreamer {
   std::unique_ptr<MCAssembler> Assembler;
   bool EmitEHFrame;
   bool EmitDebugFrame;
-  struct PendingMCFixup {
-    const MCSymbol *Sym;
-    MCFixup Fixup;
-    MCFragment *DF;
-    PendingMCFixup(const MCSymbol *McSym, MCFragment *F, MCFixup McFixup)
-        : Sym(McSym), Fixup(McFixup), DF(F) {}
-  };
-  SmallVector<PendingMCFixup, 2> PendingFixups;
 
   struct PendingAssignment {
     MCSymbol *Symbol;
@@ -62,8 +54,6 @@ class MCObjectStreamer : public MCStreamer {
   void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &);
   void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
   void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
-  void emitInstructionImpl(const MCInst &Inst, const MCSubtargetInfo &STI);
-  void resolvePendingFixups();
 
 protected:
   MCObjectStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
@@ -82,24 +72,6 @@ public:
   MCSymbol *emitCFILabel() override;
   void emitCFISections(bool EH, bool Debug) override;
 
-  void insert(MCFragment *F) {
-    auto *Sec = CurFrag->getParent();
-    F->setParent(Sec);
-    F->setLayoutOrder(CurFrag->getLayoutOrder() + 1);
-    CurFrag->Next = F;
-    CurFrag = F;
-    Sec->curFragList()->Tail = F;
-  }
-
-  /// Get a data fragment to write into, creating a new one if the current
-  /// fragment is not FT_Data.
-  /// Optionally a \p STI can be passed in so that a new fragment is created
-  /// if the Subtarget differs from the current fragment.
-  MCFragment *getOrCreateDataFragment(const MCSubtargetInfo *STI = nullptr);
-
-protected:
-  bool changeSectionImpl(MCSection *Section, uint32_t Subsection);
-
 public:
   void visitUsedSymbol(const MCSymbol &Sym) override;
 
@@ -108,6 +80,15 @@ public:
   /// \name MCStreamer Interface
   /// @{
 
+  // Add a fragment with a variable-size tail and start a new empty fragment.
+  void insert(MCFragment *F);
+
+  // Add a new fragment to the current section without a variable-size tail.
+  void newFragment();
+
+  void appendContents(size_t Num, char Elt);
+  void addFixup(const MCExpr *Value, MCFixupKind Kind);
+
   void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
   virtual void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment &F,
                               uint64_t Offset);
@@ -162,9 +143,8 @@ public:
   void emitCVStringTableDirective() override;
   void emitCVFileChecksumsDirective() override;
   void emitCVFileChecksumOffsetDirective(unsigned FileNo) override;
-  std::optional<std::pair<bool, std::string>>
-  emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr,
-                     SMLoc Loc, const MCSubtargetInfo &STI) override;
+  void emitRelocDirective(const MCExpr &Offset, StringRef Name,
+                          const MCExpr *Expr, SMLoc Loc = {}) override;
   using MCStreamer::emitFill;
   void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
                 SMLoc Loc = SMLoc()) override;
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 296fdd8..87a8349 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -91,8 +91,7 @@ private:
   bool IsRegistered : 1;
 
   bool IsText : 1;
-
-  bool IsVirtual : 1;
+  bool IsBss : 1;
 
   /// Whether the section contains linker-relaxable fragments. If true, the
   /// offset between two locations may not be fully resolved.
@@ -113,7 +112,7 @@ protected:
   StringRef Name;
   SectionVariant Variant;
 
-  MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsVirtual,
+  MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsBss,
             MCSymbol *Begin);
   // Protected non-virtual dtor prevents destroy through a base class pointer.
   ~MCSection() {}
@@ -178,9 +177,7 @@ public:
 
   /// Check whether this section is "virtual", that is has no actual object
   /// file contents.
-  bool isVirtualSection() const { return IsVirtual; }
-
-  virtual StringRef getVirtualSectionKind() const;
+  bool isBssSection() const { return IsBss; }
 };
 
 // Represents a contiguous piece of code or data within a section. Its size is
@@ -188,6 +185,7 @@ public:
 // destructors.
 class MCFragment {
   friend class MCAssembler;
+  friend class MCStreamer;
   friend class MCObjectStreamer;
   friend class MCSection;
 
@@ -206,7 +204,6 @@ public:
     FT_SymbolId,
     FT_CVInlineLines,
     FT_CVDefRange,
-    FT_PseudoProbe,
   };
 
 private:
@@ -234,11 +231,16 @@ protected:
   /// FT_Relaxable, x86-specific
   bool AllowAutoPadding : 1;
 
+  // Track content and fixups for the fixed-size part as fragments are
+  // appended to the section. The content remains immutable, except when
+  // modified by applyFixup.
   uint32_t ContentStart = 0;
   uint32_t ContentEnd = 0;
   uint32_t FixupStart = 0;
   uint32_t FixupEnd = 0;
 
+  // Track content and fixups for the optional variable-size tail part,
+  // typically modified during relaxation.
   uint32_t VarContentStart = 0;
   uint32_t VarContentEnd = 0;
   uint32_t VarFixupStart = 0;
@@ -255,6 +257,19 @@ protected:
       uint32_t OperandSize;
     } relax;
     struct {
+      // The alignment to ensure, in bytes.
+      Align Alignment;
+      // The size of the integer (in bytes) of \p Value.
+      uint8_t FillLen;
+      // If true, fill with target-specific nop instructions.
+      bool EmitNops;
+      // The maximum number of bytes to emit; if the alignment
+      // cannot be satisfied in this width then this fragment is ignored.
+      unsigned MaxBytesToEmit;
+      // Value to use for filling padding bytes.
+      int64_t Fill;
+    } align;
+    struct {
       // True if this is a sleb128, false if uleb128.
       bool IsSigned;
       // The value this fragment should contain.
@@ -283,10 +298,10 @@ public:
       return false;
     case MCFragment::FT_Relaxable:
     case MCFragment::FT_Data:
+    case MCFragment::FT_Align:
     case MCFragment::FT_Dwarf:
     case MCFragment::FT_DwarfFrame:
     case MCFragment::FT_LEB:
-    case MCFragment::FT_PseudoProbe:
     case MCFragment::FT_CVInlineLines:
     case MCFragment::FT_CVDefRange:
       return true;
@@ -328,9 +343,9 @@ public:
   bool getAllowAutoPadding() const { return AllowAutoPadding; }
   void setAllowAutoPadding(bool V) { AllowAutoPadding = V; }
 
-  // Content-related functions manage parent's storage using ContentStart and
+  //== Content-related functions manage parent's storage using ContentStart and
   // ContentSize.
-  void clearContents() { ContentEnd = ContentStart; }
+
   // Get a SmallVector reference. The caller should call doneAppending to update
   // `ContentEnd`.
   SmallVectorImpl<char> &getContentsForAppending() {
@@ -354,7 +369,6 @@ public:
     getContentsForAppending().append(Num, Elt);
     doneAppending();
   }
-  LLVM_ABI void setContents(ArrayRef<char> Contents);
   MutableArrayRef<char> getContents() {
     return MutableArrayRef(getParent()->ContentStorage)
         .slice(ContentStart, ContentEnd - ContentStart);
@@ -386,7 +400,6 @@ public:
   void clearFixups() { FixupEnd = FixupStart; }
   LLVM_ABI void addFixup(MCFixup Fixup);
   LLVM_ABI void appendFixups(ArrayRef<MCFixup> Fixups);
-  LLVM_ABI void setFixups(ArrayRef<MCFixup> Fixups);
   MutableArrayRef<MCFixup> getFixups() {
     return MutableArrayRef(getParent()->FixupStorage)
         .slice(FixupStart, FixupEnd - FixupStart);
@@ -441,7 +454,45 @@ public:
     llvm::copy(Inst, S.begin() + u.relax.OperandStart);
   }
 
+  //== FT_Align functions
+  void makeAlign(Align Alignment, int64_t Fill, uint8_t FillLen,
+                 unsigned MaxBytesToEmit) {
+    Kind = FT_Align;
+    u.align.EmitNops = false;
+    u.align.Alignment = Alignment;
+    u.align.Fill = Fill;
+    u.align.FillLen = FillLen;
+    u.align.MaxBytesToEmit = MaxBytesToEmit;
+  }
+
+  Align getAlignment() const {
+    assert(Kind == FT_Align);
+    return u.align.Alignment;
+  }
+  int64_t getAlignFill() const {
+    assert(Kind == FT_Align);
+    return u.align.Fill;
+  }
+  uint8_t getAlignFillLen() const {
+    assert(Kind == FT_Align);
+    return u.align.FillLen;
+  }
+  unsigned getAlignMaxBytesToEmit() const {
+    assert(Kind == FT_Align);
+    return u.align.MaxBytesToEmit;
+  }
+  bool hasAlignEmitNops() const {
+    assert(Kind == FT_Align);
+    return u.align.EmitNops;
+  }
+
   //== FT_LEB functions
+  void makeLEB(bool IsSigned, const MCExpr *Value) {
+    assert(Kind == FT_Data);
+    Kind = MCFragment::FT_LEB;
+    u.leb.IsSigned = IsSigned;
+    u.leb.Value = Value;
+  }
   const MCExpr &getLEBValue() const {
     assert(Kind == FT_LEB);
     return *u.leb.Value;
@@ -454,10 +505,6 @@ public:
     assert(Kind == FT_LEB);
     return u.leb.IsSigned;
   }
-  void setLEBSigned(bool S) {
-    assert(Kind == FT_LEB);
-    u.leb.IsSigned = S;
-  }
 
   //== FT_DwarfFrame functions
   const MCExpr &getDwarfAddrDelta() const {
@@ -486,52 +533,6 @@ protected:
       : MCFragment(FType, HasInstructions) {}
 };
 
-class MCAlignFragment : public MCFragment {
-  /// Flag to indicate that (optimal) NOPs should be emitted instead
-  /// of using the provided value. The exact interpretation of this flag is
-  /// target dependent.
-  bool EmitNops : 1;
-
-  /// The alignment to ensure, in bytes.
-  Align Alignment;
-
-  /// The size of the integer (in bytes) of \p Value.
-  uint8_t FillLen;
-
-  /// The maximum number of bytes to emit; if the alignment
-  /// cannot be satisfied in this width then this fragment is ignored.
-  unsigned MaxBytesToEmit;
-
-  /// Value to use for filling padding bytes.
-  int64_t Fill;
-
-  /// When emitting Nops some subtargets have specific nop encodings.
-  const MCSubtargetInfo *STI = nullptr;
-
-public:
-  MCAlignFragment(Align Alignment, int64_t Fill, uint8_t FillLen,
-                  unsigned MaxBytesToEmit)
-      : MCFragment(FT_Align, false), EmitNops(false), Alignment(Alignment),
-        FillLen(FillLen), MaxBytesToEmit(MaxBytesToEmit), Fill(Fill) {}
-
-  Align getAlignment() const { return Alignment; }
-  int64_t getFill() const { return Fill; }
-  uint8_t getFillLen() const { return FillLen; }
-  unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
-
-  bool hasEmitNops() const { return EmitNops; }
-  void setEmitNops(bool Value, const MCSubtargetInfo *STI) {
-    EmitNops = Value;
-    this->STI = STI;
-  }
-
-  const MCSubtargetInfo *getSubtargetInfo() const { return STI; }
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_Align;
-  }
-};
-
 class MCFillFragment : public MCFragment {
   uint8_t ValueSize;
   /// Value to use for filling bytes.
@@ -729,22 +730,6 @@ public:
   }
 };
 
-class MCPseudoProbeAddrFragment : public MCEncodedFragment {
-  /// The expression for the difference of the two symbols that
-  /// make up the address delta between two .pseudoprobe directives.
-  const MCExpr *AddrDelta;
-
-public:
-  MCPseudoProbeAddrFragment(const MCExpr *AddrDelta)
-      : MCEncodedFragment(FT_PseudoProbe, false), AddrDelta(AddrDelta) {}
-
-  const MCExpr &getAddrDelta() const { return *AddrDelta; }
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_PseudoProbe;
-  }
-};
-
 inline MCSection::iterator &MCSection::iterator::operator++() {
   F = F->Next;
   return *this;
diff --git a/llvm/include/llvm/MC/MCSectionCOFF.h b/llvm/include/llvm/MC/MCSectionCOFF.h
index 4472a12..f979413a 100644
--- a/llvm/include/llvm/MC/MCSectionCOFF.h
+++ b/llvm/include/llvm/MC/MCSectionCOFF.h
@@ -82,7 +82,6 @@ public:
                             raw_ostream &OS,
                             uint32_t Subsection) const override;
   bool useCodeAlign() const override;
-  StringRef getVirtualSectionKind() const override;
 
   unsigned getOrAssignWinCFISectionID(unsigned *NextID) const {
     if (WinCFISectionID == ~0U)
diff --git a/llvm/include/llvm/MC/MCSectionELF.h b/llvm/include/llvm/MC/MCSectionELF.h
index f09d305..64a4daf 100644
--- a/llvm/include/llvm/MC/MCSectionELF.h
+++ b/llvm/include/llvm/MC/MCSectionELF.h
@@ -68,10 +68,6 @@ private:
       Group.getPointer()->setIsSignature();
   }
 
-  // TODO Delete after we stop supporting generation of GNU-style .zdebug_*
-  // sections.
-  void setSectionName(StringRef Name) { this->Name = Name; }
-
 public:
   /// Decides whether a '.section' directive should be printed before the
   /// section name
@@ -88,7 +84,6 @@ public:
                             raw_ostream &OS,
                             uint32_t Subsection) const override;
   bool useCodeAlign() const override;
-  StringRef getVirtualSectionKind() const override;
 
   bool isUnique() const { return UniqueID != NonUniqueID; }
   unsigned getUniqueID() const { return UniqueID; }
diff --git a/llvm/include/llvm/MC/MCSectionGOFF.h b/llvm/include/llvm/MC/MCSectionGOFF.h
index 9e3f95e..b166397 100644
--- a/llvm/include/llvm/MC/MCSectionGOFF.h
+++ b/llvm/include/llvm/MC/MCSectionGOFF.h
@@ -111,7 +111,7 @@ public:
 
   // Returns the text style for a section. Only defined for ED and PR sections.
   GOFF::ESDTextStyle getTextStyle() const {
-    assert((isED() || isPR() || isVirtualSection()) && "Expect ED or PR section");
+    assert((isED() || isPR() || isBssSection()) && "Expect ED or PR section");
     if (isED())
       return EDAttributes.TextStyle;
     if (isPR())
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index 1f7c8b5..4bfc8f9 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -259,6 +259,8 @@ class LLVM_ABI MCStreamer {
   bool AllowAutoPadding = false;
 
 protected:
+  bool IsObj = false;
+
   // Symbol of the current epilog for which we are processing SEH directives.
   WinEH::FrameInfo::Epilog *CurrentWinEpilog = nullptr;
 
@@ -270,6 +272,8 @@ protected:
   /// section changes.
   virtual void changeSection(MCSection *, uint32_t);
 
+  void addFragment(MCFragment *F);
+
   virtual void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame);
   virtual void emitCFIEndProcImpl(MCDwarfFrameInfo &CurFrame);
 
@@ -308,6 +312,7 @@ public:
   virtual void reset();
 
   MCContext &getContext() const { return Context; }
+  bool isObj() const { return IsObj; }
 
   // MCObjectStreamer has an MCAssembler and allows more expression folding at
   // parse time.
@@ -425,11 +430,15 @@ public:
   }
 
   MCFragment *getCurrentFragment() const {
+    // Ensure consistency with the section stack.
     assert(!getCurrentSection().first ||
            CurFrag->getParent() == getCurrentSection().first);
+    // Ensure we eagerly allocate an empty fragment after adding fragment with a
+    // variable-size tail.
+    assert(!CurFrag || CurFrag->getKind() == MCFragment::FT_Data);
     return CurFrag;
   }
-
+  size_t getCurFragOffset() const { return getCurrentFragment()->Offset; }
   /// Save the current and previous section on the section stack.
   void pushSection() {
     SectionStack.push_back(
@@ -1048,13 +1057,9 @@ public:
 
   virtual void emitSyntaxDirective();
 
-  /// Record a relocation described by the .reloc directive. Return std::nullopt
-  /// if succeeded. Otherwise, return a pair (Name is invalid, error message).
-  virtual std::optional<std::pair<bool, std::string>>
-  emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr,
-                     SMLoc Loc, const MCSubtargetInfo &STI) {
-    return std::nullopt;
-  }
+  /// Record a relocation described by the .reloc directive.
+  virtual void emitRelocDirective(const MCExpr &Offset, StringRef Name,
+                                  const MCExpr *Expr, SMLoc Loc = {}) {}
 
   virtual void emitAddrsig() {}
   virtual void emitAddrsigSym(const MCSymbol *Sym) {}
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index 1036868..a3aa0d9 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -1312,7 +1312,7 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
     case ELF::EM_PPC:
       return (IsLittleEndian ? "elf32-powerpcle" : "elf32-powerpc");
     case ELF::EM_RISCV:
-      return "elf32-littleriscv";
+      return (IsLittleEndian ? "elf32-littleriscv" : "elf32-bigriscv");
     case ELF::EM_CSKY:
       return "elf32-csky";
     case ELF::EM_SPARC:
@@ -1338,7 +1338,7 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
     case ELF::EM_PPC64:
       return (IsLittleEndian ? "elf64-powerpcle" : "elf64-powerpc");
     case ELF::EM_RISCV:
-      return "elf64-littleriscv";
+      return (IsLittleEndian ? "elf64-littleriscv" : "elf64-bigriscv");
     case ELF::EM_S390:
       return "elf64-s390";
     case ELF::EM_SPARCV9:
@@ -1400,9 +1400,9 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
   case ELF::EM_RISCV:
     switch (EF.getHeader().e_ident[ELF::EI_CLASS]) {
     case ELF::ELFCLASS32:
-      return Triple::riscv32;
+      return IsLittleEndian ? Triple::riscv32 : Triple::riscv32be;
     case ELF::ELFCLASS64:
-      return Triple::riscv64;
+      return IsLittleEndian ? Triple::riscv64 : Triple::riscv64be;
     default:
       report_fatal_error("Invalid ELFCLASS!");
     }
diff --git a/llvm/include/llvm/Object/SFrameParser.h b/llvm/include/llvm/Object/SFrameParser.h
new file mode 100644
index 0000000..cf4fe20
--- /dev/null
+++ b/llvm/include/llvm/Object/SFrameParser.h
@@ -0,0 +1,48 @@
+//===- SFrameParser.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_SFRAME_H
+#define LLVM_OBJECT_SFRAME_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/BinaryFormat/SFrame.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+
+namespace llvm {
+namespace object {
+
+template <endianness E> class SFrameParser {
+public:
+  static Expected<SFrameParser> create(ArrayRef<uint8_t> Contents);
+
+  const sframe::Preamble<E> &getPreamble() const { return Header.Preamble; }
+  const sframe::Header<E> &getHeader() const { return Header; }
+
+  bool usesFixedRAOffset() const {
+    return getHeader().ABIArch == sframe::ABI::AMD64EndianLittle;
+  }
+  bool usesFixedFPOffset() const {
+    return false; // Not used in any currently defined ABI.
+  }
+
+private:
+  ArrayRef<uint8_t> Data;
+  const sframe::Header<E> &Header;
+
+  SFrameParser(ArrayRef<uint8_t> Data, const sframe::Header<E> &Header)
+      : Data(Data), Header(Header) {}
+};
+
+extern template class SFrameParser<endianness::big>;
+extern template class SFrameParser<endianness::little>;
+
+} // end namespace object
+} // end namespace llvm
+
+#endif // LLVM_OBJECT_SFRAME_H
diff --git a/llvm/include/llvm/Pass.h b/llvm/include/llvm/Pass.h
index 2ecd47d..f3962c3 100644
--- a/llvm/include/llvm/Pass.h
+++ b/llvm/include/llvm/Pass.h
@@ -114,6 +114,10 @@ public:
   /// Registration templates, but can be overloaded directly.
   virtual StringRef getPassName() const;
 
+  /// Return a nice clean name for a pass
+  /// corresponding to that used to enable the pass in opt.
+  StringRef getPassArgument() const;
+
   /// getPassID - Return the PassID number that corresponds to this pass.
   AnalysisID getPassID() const {
     return PassID;
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index a8176eb..b0360f1 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -281,7 +281,7 @@ protected:
 
       FunctionPassManager FPM;
       FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
-      FPM.addPass(InvalidateAnalysisPass<MachineFunctionAnalysis>());
+      FPM.addPass(FreeMachineFunctionPass());
       if (this->PB.AddInCGSCCOrder) {
         MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
             createCGSCCToFunctionPassAdaptor(std::move(FPM))));
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 66051d7..fc81ab7 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1132,14 +1132,14 @@ def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
 def udiv_by_const : GICombineRule<
   (defs root:$root),
   (match (G_UDIV $dst, $x, $y):$root,
-   [{ return Helper.matchUDivorURemByConst(*${root}); }]),
-  (apply [{ Helper.applyUDivorURemByConst(*${root}); }])>;
+   [{ return Helper.matchUDivOrURemByConst(*${root}); }]),
+  (apply [{ Helper.applyUDivOrURemByConst(*${root}); }])>;
 
 def sdiv_by_const : GICombineRule<
   (defs root:$root),
   (match (G_SDIV $dst, $x, $y):$root,
-   [{ return Helper.matchSDivByConst(*${root}); }]),
-  (apply [{ Helper.applySDivByConst(*${root}); }])>;
+   [{ return Helper.matchSDivOrSRemByConst(*${root}); }]),
+  (apply [{ Helper.applySDivOrSRemByConst(*${root}); }])>;
 
 def sdiv_by_pow2 : GICombineRule<
   (defs root:$root),
@@ -1159,10 +1159,16 @@ def intdiv_combines : GICombineGroup<[udiv_by_pow2, sdiv_by_pow2,
 def urem_by_const : GICombineRule<
   (defs root:$root),
   (match (G_UREM $dst, $x, $y):$root,
-   [{ return Helper.matchUDivorURemByConst(*${root}); }]),
-  (apply [{ Helper.applyUDivorURemByConst(*${root}); }])>;
+   [{ return Helper.matchUDivOrURemByConst(*${root}); }]),
+  (apply [{ Helper.applyUDivOrURemByConst(*${root}); }])>;
 
-def intrem_combines : GICombineGroup<[urem_by_const]>;
+def srem_by_const : GICombineRule<
+  (defs root:$root),
+  (match (G_SREM $dst, $x, $y):$root,
+   [{ return Helper.matchSDivOrSRemByConst(*${root}); }]),
+  (apply [{ Helper.applySDivOrSRemByConst(*${root}); }])>;
+
+def intrem_combines : GICombineGroup<[urem_by_const, srem_by_const]>;
 
 def reassoc_ptradd : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$matchinfo),
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 7577792..b65a63b 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -78,6 +78,8 @@ def : GINodeEquiv<G_XOR, xor>;
 def : GINodeEquiv<G_SHL, shl>;
 def : GINodeEquiv<G_LSHR, srl>;
 def : GINodeEquiv<G_ASHR, sra>;
+def : GINodeEquiv<G_ABDS, abds>;
+def : GINodeEquiv<G_ABDU, abdu>;
 def : GINodeEquiv<G_SADDSAT, saddsat>;
 def : GINodeEquiv<G_UADDSAT, uaddsat>;
 def : GINodeEquiv<G_SSUBSAT, ssubsat>;
diff --git a/llvm/include/llvm/TargetParser/Host.h b/llvm/include/llvm/TargetParser/Host.h
index be3d41e..40a9b6c 100644
--- a/llvm/include/llvm/TargetParser/Host.h
+++ b/llvm/include/llvm/TargetParser/Host.h
@@ -53,7 +53,7 @@ LLVM_ABI StringRef getHostCPUName();
 /// which features may appear in this map, except that they are all valid LLVM
 /// feature names. The map can be empty, for example if feature detection
 /// fails.
-LLVM_ABI const StringMap<bool, MallocAllocator> getHostCPUFeatures();
+LLVM_ABI StringMap<bool, MallocAllocator> getHostCPUFeatures();
 
 /// This is a function compatible with cl::AddExtraVersionPrinter, which adds
 /// info about the current target triple and detected CPU.
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index 57d771b..670a632 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -49,62 +49,64 @@ public:
   enum ArchType {
     UnknownArch,
 
-    arm,            // ARM (little endian): arm, armv.*, xscale
-    armeb,          // ARM (big endian): armeb
-    aarch64,        // AArch64 (little endian): aarch64
-    aarch64_be,     // AArch64 (big endian): aarch64_be
-    aarch64_32,     // AArch64 (little endian) ILP32: aarch64_32
-    arc,            // ARC: Synopsys ARC
-    avr,            // AVR: Atmel AVR microcontroller
-    bpfel,          // eBPF or extended BPF or 64-bit BPF (little endian)
-    bpfeb,          // eBPF or extended BPF or 64-bit BPF (big endian)
-    csky,           // CSKY: csky
-    dxil,           // DXIL 32-bit DirectX bytecode
-    hexagon,        // Hexagon: hexagon
-    loongarch32,    // LoongArch (32-bit): loongarch32
-    loongarch64,    // LoongArch (64-bit): loongarch64
-    m68k,           // M68k: Motorola 680x0 family
-    mips,           // MIPS: mips, mipsallegrex, mipsr6
-    mipsel,         // MIPSEL: mipsel, mipsallegrexe, mipsr6el
-    mips64,         // MIPS64: mips64, mips64r6, mipsn32, mipsn32r6
-    mips64el,       // MIPS64EL: mips64el, mips64r6el, mipsn32el, mipsn32r6el
-    msp430,         // MSP430: msp430
-    ppc,            // PPC: powerpc
-    ppcle,          // PPCLE: powerpc (little endian)
-    ppc64,          // PPC64: powerpc64, ppu
-    ppc64le,        // PPC64LE: powerpc64le
-    r600,           // R600: AMD GPUs HD2XXX - HD6XXX
-    amdgcn,         // AMDGCN: AMD GCN GPUs
-    riscv32,        // RISC-V (32-bit): riscv32
-    riscv64,        // RISC-V (64-bit): riscv64
-    sparc,          // Sparc: sparc
-    sparcv9,        // Sparcv9: Sparcv9
-    sparcel,        // Sparc: (endianness = little). NB: 'Sparcle' is a CPU variant
-    systemz,        // SystemZ: s390x
-    tce,            // TCE (http://tce.cs.tut.fi/): tce
-    tcele,          // TCE little endian (http://tce.cs.tut.fi/): tcele
-    thumb,          // Thumb (little endian): thumb, thumbv.*
-    thumbeb,        // Thumb (big endian): thumbeb
-    x86,            // X86: i[3-9]86
-    x86_64,         // X86-64: amd64, x86_64
-    xcore,          // XCore: xcore
-    xtensa,         // Tensilica: Xtensa
-    nvptx,          // NVPTX: 32-bit
-    nvptx64,        // NVPTX: 64-bit
-    amdil,          // AMDIL
-    amdil64,        // AMDIL with 64-bit pointers
-    hsail,          // AMD HSAIL
-    hsail64,        // AMD HSAIL with 64-bit pointers
-    spir,           // SPIR: standard portable IR for OpenCL 32-bit version
-    spir64,         // SPIR: standard portable IR for OpenCL 64-bit version
-    spirv,          // SPIR-V with logical memory layout.
-    spirv32,        // SPIR-V with 32-bit pointers
-    spirv64,        // SPIR-V with 64-bit pointers
-    kalimba,        // Kalimba: generic kalimba
-    shave,          // SHAVE: Movidius vector VLIW processors
-    lanai,          // Lanai: Lanai 32-bit
-    wasm32,         // WebAssembly with 32-bit pointers
-    wasm64,         // WebAssembly with 64-bit pointers
+    arm,         // ARM (little endian): arm, armv.*, xscale
+    armeb,       // ARM (big endian): armeb
+    aarch64,     // AArch64 (little endian): aarch64
+    aarch64_be,  // AArch64 (big endian): aarch64_be
+    aarch64_32,  // AArch64 (little endian) ILP32: aarch64_32
+    arc,         // ARC: Synopsys ARC
+    avr,         // AVR: Atmel AVR microcontroller
+    bpfel,       // eBPF or extended BPF or 64-bit BPF (little endian)
+    bpfeb,       // eBPF or extended BPF or 64-bit BPF (big endian)
+    csky,        // CSKY: csky
+    dxil,        // DXIL 32-bit DirectX bytecode
+    hexagon,     // Hexagon: hexagon
+    loongarch32, // LoongArch (32-bit): loongarch32
+    loongarch64, // LoongArch (64-bit): loongarch64
+    m68k,        // M68k: Motorola 680x0 family
+    mips,        // MIPS: mips, mipsallegrex, mipsr6
+    mipsel,      // MIPSEL: mipsel, mipsallegrexe, mipsr6el
+    mips64,      // MIPS64: mips64, mips64r6, mipsn32, mipsn32r6
+    mips64el,    // MIPS64EL: mips64el, mips64r6el, mipsn32el, mipsn32r6el
+    msp430,      // MSP430: msp430
+    ppc,         // PPC: powerpc
+    ppcle,       // PPCLE: powerpc (little endian)
+    ppc64,       // PPC64: powerpc64, ppu
+    ppc64le,     // PPC64LE: powerpc64le
+    r600,        // R600: AMD GPUs HD2XXX - HD6XXX
+    amdgcn,      // AMDGCN: AMD GCN GPUs
+    riscv32,     // RISC-V (32-bit, little endian): riscv32
+    riscv64,     // RISC-V (64-bit, little endian): riscv64
+    riscv32be,   // RISC-V (32-bit, big endian): riscv32be
+    riscv64be,   // RISC-V (64-bit, big endian): riscv64be
+    sparc,       // Sparc: sparc
+    sparcv9,     // Sparcv9: Sparcv9
+    sparcel,     // Sparc: (endianness = little). NB: 'Sparcle' is a CPU variant
+    systemz,     // SystemZ: s390x
+    tce,         // TCE (http://tce.cs.tut.fi/): tce
+    tcele,       // TCE little endian (http://tce.cs.tut.fi/): tcele
+    thumb,       // Thumb (little endian): thumb, thumbv.*
+    thumbeb,     // Thumb (big endian): thumbeb
+    x86,         // X86: i[3-9]86
+    x86_64,      // X86-64: amd64, x86_64
+    xcore,       // XCore: xcore
+    xtensa,      // Tensilica: Xtensa
+    nvptx,       // NVPTX: 32-bit
+    nvptx64,     // NVPTX: 64-bit
+    amdil,       // AMDIL
+    amdil64,     // AMDIL with 64-bit pointers
+    hsail,       // AMD HSAIL
+    hsail64,     // AMD HSAIL with 64-bit pointers
+    spir,        // SPIR: standard portable IR for OpenCL 32-bit version
+    spir64,      // SPIR: standard portable IR for OpenCL 64-bit version
+    spirv,       // SPIR-V with logical memory layout.
+    spirv32,     // SPIR-V with 32-bit pointers
+    spirv64,     // SPIR-V with 64-bit pointers
+    kalimba,     // Kalimba: generic kalimba
+    shave,       // SHAVE: Movidius vector VLIW processors
+    lanai,       // Lanai: Lanai 32-bit
+    wasm32,      // WebAssembly with 32-bit pointers
+    wasm64,      // WebAssembly with 64-bit pointers
     renderscript32, // 32-bit RenderScript
     renderscript64, // 64-bit RenderScript
     ve,             // NEC SX-Aurora Vector Engine
@@ -1064,10 +1066,14 @@ public:
   }
 
   /// Tests whether the target is 32-bit RISC-V.
-  bool isRISCV32() const { return getArch() == Triple::riscv32; }
+  bool isRISCV32() const {
+    return getArch() == Triple::riscv32 || getArch() == Triple::riscv32be;
+  }
 
   /// Tests whether the target is 64-bit RISC-V.
-  bool isRISCV64() const { return getArch() == Triple::riscv64; }
+  bool isRISCV64() const {
+    return getArch() == Triple::riscv64 || getArch() == Triple::riscv64be;
+  }
 
   /// Tests whether the target is RISC-V (32- and 64-bit).
   bool isRISCV() const { return isRISCV32() || isRISCV64(); }
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index bb79d25..3f5f427 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -325,7 +325,6 @@ LLVM_ABI void salvageDebugInfo(Instruction &I);
 /// Mark undef if salvaging cannot be completed.
 LLVM_ABI void
 salvageDebugInfoForDbgValues(Instruction &I,
-                             ArrayRef<DbgVariableIntrinsic *> Insns,
                              ArrayRef<DbgVariableRecord *> DPInsns);
 
 /// Given an instruction \p I and DIExpression \p DIExpr operating on
diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
index 4e5da81..9500b1f 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -29,7 +29,6 @@ template <typename T> class SSAUpdaterTraits;
 class Type;
 class Use;
 class Value;
-class DbgValueInst;
 
 /// Helper class for SSA formation on a set of values defined in
 /// multiple blocks.
@@ -123,8 +122,6 @@ public:
   /// value set to the new SSA value if available, and undef if not.
   void UpdateDebugValues(Instruction *I);
   void UpdateDebugValues(Instruction *I,
-                         SmallVectorImpl<DbgValueInst *> &DbgValues);
-  void UpdateDebugValues(Instruction *I,
                          SmallVectorImpl<DbgVariableRecord *> &DbgValues);
 
   /// Rewrite a use like \c RewriteUse but handling in-block definitions.
@@ -136,7 +133,6 @@ public:
 
 private:
   Value *GetValueAtEndOfBlockInternal(BasicBlock *BB);
-  void UpdateDebugValue(Instruction *I, DbgValueInst *DbgValue);
   void UpdateDebugValue(Instruction *I, DbgVariableRecord *DbgValue);
 };
 
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index a101151..39fef92 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -530,6 +530,7 @@ private:
 
   bool isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L);
 
+  Value *tryToReuseLCSSAPhi(const SCEVAddRecExpr *S);
   Value *expandAddRecExprLiterally(const SCEVAddRecExpr *);
   PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
                                      const Loop *L, Type *&TruncTy,