38 files changed, 996 insertions, 534 deletions
diff --git a/llvm/include/llvm/ADT/Any.h b/llvm/include/llvm/ADT/Any.h
index 88dbce9..a29aaa3 100644
--- a/llvm/include/llvm/ADT/Any.h
+++ b/llvm/include/llvm/ADT/Any.h
@@ -119,7 +119,6 @@ private:
   template <class T> friend T any_cast(Any &&Value);
   template <class T> friend const T *any_cast(const Any *Value);
   template <class T> friend T *any_cast(Any *Value);
-  template <typename T> friend bool any_isa(const Any &Value);
 
   std::unique_ptr<StorageBase> Storage;
 };
diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h
index 956dcbc..93c6bfb 100644
--- a/llvm/include/llvm/Analysis/DXILResource.h
+++ b/llvm/include/llvm/Analysis/DXILResource.h
@@ -12,6 +12,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Frontend/HLSL/HLSLBinding.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/PassManager.h"
@@ -633,86 +634,25 @@ LLVM_ABI ModulePass *createDXILResourceWrapperPassPass();
 // register slots to resources with implicit bindings, and in a
 // post-optimization validation pass that will raise diagnostic about
 // overlapping bindings.
-//
-// For example for these resource bindings:
-//
-// RWBuffer<float> A[10] : register(u3);
-// RWBuffer<float> B[] : register(u5, space2)
-//
-// The analysis result for UAV binding type will look like this:
-//
-// UAVSpaces {
-//   ResClass = ResourceClass::UAV,
-//   Spaces = {
-//     { Space = 0, FreeRanges = {{ 0, 2 }, { 13, UINT32_MAX }} },
-//     { Space = 2, FreeRanges = {{ 0, 4 }} }
-//   }
-// }
-//
 class DXILResourceBindingInfo {
-public:
-  struct BindingRange {
-    uint32_t LowerBound;
-    uint32_t UpperBound;
-    BindingRange(uint32_t LB, uint32_t UB) : LowerBound(LB), UpperBound(UB) {}
-  };
-
-  struct RegisterSpace {
-    uint32_t Space;
-    SmallVector<BindingRange> FreeRanges;
-    RegisterSpace(uint32_t Space) : Space(Space) {
-      FreeRanges.emplace_back(0, UINT32_MAX);
-    }
-    // Size == -1 means unbounded array
-    LLVM_ABI std::optional<uint32_t> findAvailableBinding(int32_t Size);
-  };
-
-  struct BindingSpaces {
-    dxil::ResourceClass RC;
-    llvm::SmallVector<RegisterSpace> Spaces;
-    BindingSpaces(dxil::ResourceClass RC) : RC(RC) {}
-    LLVM_ABI RegisterSpace &getOrInsertSpace(uint32_t Space);
-  };
-
-private:
-  BindingSpaces SRVSpaces, UAVSpaces, CBufferSpaces, SamplerSpaces;
-  bool ImplicitBinding;
-  bool OverlappingBinding;
+  hlsl::BindingInfo Bindings;
+  bool HasImplicitBinding = false;
+  bool HasOverlappingBinding = false;
 
   // Populate the resource binding info given explicit resource binding calls
   // in the module.
   void populate(Module &M, DXILResourceTypeMap &DRTM);
 
 public:
-  DXILResourceBindingInfo()
-      : SRVSpaces(dxil::ResourceClass::SRV),
-        UAVSpaces(dxil::ResourceClass::UAV),
-        CBufferSpaces(dxil::ResourceClass::CBuffer),
-        SamplerSpaces(dxil::ResourceClass::Sampler), ImplicitBinding(false),
-        OverlappingBinding(false) {}
-
-  bool hasImplicitBinding() const { return ImplicitBinding; }
-  void setHasImplicitBinding(bool Value) { ImplicitBinding = Value; }
-  bool hasOverlappingBinding() const { return OverlappingBinding; }
-
-  BindingSpaces &getBindingSpaces(dxil::ResourceClass RC) {
-    switch (RC) {
-    case dxil::ResourceClass::SRV:
-      return SRVSpaces;
-    case dxil::ResourceClass::UAV:
-      return UAVSpaces;
-    case dxil::ResourceClass::CBuffer:
-      return CBufferSpaces;
-    case dxil::ResourceClass::Sampler:
-      return SamplerSpaces;
-    }
+  bool hasImplicitBinding() const { return HasImplicitBinding; }
+  void setHasImplicitBinding(bool Value) { HasImplicitBinding = Value; }
+  bool hasOverlappingBinding() const { return HasOverlappingBinding; }
+  void setHasOverlappingBinding(bool Value) { HasOverlappingBinding = Value; }
 
-    llvm_unreachable("Invalid resource class");
-  }
-
-  // Size == -1 means unbounded array
   LLVM_ABI std::optional<uint32_t>
-  findAvailableBinding(dxil::ResourceClass RC, uint32_t Space, int32_t Size);
+  findAvailableBinding(dxil::ResourceClass RC, uint32_t Space, int32_t Size) {
+    return Bindings.findAvailableBinding(RC, Space, Size);
+  }
 
   friend class DXILResourceBindingAnalysis;
   friend class DXILResourceBindingWrapperPass;
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index af6e534..92304ed 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -180,10 +180,12 @@ public:
                         const SmallVectorImpl<Instruction *> &Instrs) const;
   };
 
-  MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L,
+  MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC,
+                   DominatorTree *DT, const Loop *L,
                    const DenseMap<Value *, const SCEV *> &SymbolicStrides,
                    unsigned MaxTargetVectorWidthInBits)
-      : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides),
+      : PSE(PSE), AC(AC), DT(DT), InnermostLoop(L),
+        SymbolicStrides(SymbolicStrides),
         MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
 
   /// Register the location (instructions are given increasing numbers)
@@ -288,6 +290,15 @@ public:
     return PointerBounds;
   }
 
+  DominatorTree *getDT() const {
+    assert(DT && "requested DT, but it is not available");
+    return DT;
+  }
+  AssumptionCache *getAC() const {
+    assert(AC && "requested AC, but it is not available");
+    return AC;
+  }
+
 private:
   /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
   /// applies dynamic knowledge to simplify SCEV expressions and convert them
@@ -296,6 +307,10 @@ private:
   /// example we might assume a unit stride for a pointer in order to prove
   /// that a memory access is strided and doesn't wrap.
   PredicatedScalarEvolution &PSE;
+
+  AssumptionCache *AC;
+  DominatorTree *DT;
+
   const Loop *InnermostLoop;
 
   /// Reference to map of pointer values to
@@ -670,7 +685,7 @@ public:
   LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                           const TargetTransformInfo *TTI,
                           const TargetLibraryInfo *TLI, AAResults *AA,
-                          DominatorTree *DT, LoopInfo *LI,
+                          DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC,
                           bool AllowPartial = false);
 
   /// Return true we can analyze the memory accesses in the loop and there are
@@ -922,7 +937,8 @@ LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
     const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
     const SCEV *MaxBTC, ScalarEvolution *SE,
     DenseMap<std::pair<const SCEV *, Type *>,
-             std::pair<const SCEV *, const SCEV *>> *PointerBounds);
+             std::pair<const SCEV *, const SCEV *>> *PointerBounds,
+    DominatorTree *DT, AssumptionCache *AC);
 
 class LoopAccessInfoManager {
   /// The cache.
@@ -935,12 +951,13 @@ class LoopAccessInfoManager {
   LoopInfo &LI;
   TargetTransformInfo *TTI;
   const TargetLibraryInfo *TLI = nullptr;
+  AssumptionCache *AC;
 
 public:
   LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT,
                         LoopInfo &LI, TargetTransformInfo *TTI,
-                        const TargetLibraryInfo *TLI)
-      : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
+                        const TargetLibraryInfo *TLI, AssumptionCache *AC)
+      : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI), AC(AC) {}
 
   LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false);
 
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h
index bff7707..011d599 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h
@@ -91,6 +91,10 @@ inline bind_ty<const SCEVUnknown> m_SCEVUnknown(const SCEVUnknown *&V) {
   return V;
 }
 
+inline bind_ty<const SCEVAddExpr> m_scev_Add(const SCEVAddExpr *&V) {
+  return V;
+}
+
 /// Match a specified const SCEV *.
 struct specificscev_ty {
   const SCEV *Expr;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 7928835..aa4550d 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1950,6 +1950,10 @@ public:
       const Function &F,
       SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;
 
+  /// Returns true if GEP should not be used to index into vectors for this
+  /// target.
+  LLVM_ABI bool allowVectorElementIndexingUsingGEP() const;
+
 private:
   std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
 };
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 2ea87b3..abdbca0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1145,6 +1145,8 @@ public:
       const Function &F,
       SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
 
+  virtual bool allowVectorElementIndexingUsingGEP() const { return true; }
+
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
   // In case of a vector it returns the min required size for one element.
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index b55c4e0..6781cd5 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -633,6 +633,9 @@ public:
     return true;
   }
 
+  /// Return true if this group is full, i.e. it has no gaps.
+  bool isFull() const { return getNumMembers() == getFactor(); }
+
 private:
   uint32_t Factor; // Interleave Factor.
   bool Reverse;
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index ad35d7f..749971e 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -973,7 +973,10 @@ enum : unsigned {
 
   // SM based processor values.
   EF_CUDA_SM100 = 0x6400,
+  EF_CUDA_SM101 = 0x6500,
+  EF_CUDA_SM103 = 0x6700,
   EF_CUDA_SM120 = 0x7800,
+  EF_CUDA_SM121 = 0x7900,
 
   // Set when using an accelerator variant like sm_100a.
   EF_CUDA_ACCELERATORS = 0x8,
diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h
index 78f5eb4..5f53681 100644
--- a/llvm/include/llvm/Bitstream/BitstreamWriter.h
+++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h
@@ -466,7 +466,7 @@ private:
 
     EmitCode(Abbrev);
 
-    unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
+    unsigned i = 0, e = Abbv->getNumOperandInfos();
     if (Code) {
       assert(e && "Expected non-empty abbreviation");
       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i++);
@@ -632,8 +632,7 @@ private:
   void EncodeAbbrev(const BitCodeAbbrev &Abbv) {
     EmitCode(bitc::DEFINE_ABBREV);
     EmitVBR(Abbv.getNumOperandInfos(), 5);
-    for (unsigned i = 0, e = static_cast<unsigned>(Abbv.getNumOperandInfos());
-         i != e; ++i) {
+    for (unsigned i = 0, e = Abbv.getNumOperandInfos(); i != e; ++i) {
       const BitCodeAbbrevOp &Op = Abbv.getOperandInfo(i);
       Emit(Op.isLiteral(), 1);
       if (Op.isLiteral()) {
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index faab2503..91c0142 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -190,6 +190,36 @@ private:
   /// Emit comments in assembly output if this is true.
   bool VerboseAsm;
 
+  /// Store symbols and type identifiers used to create callgraph section
+  /// entries related to a function.
+  struct FunctionInfo {
+    /// Numeric type identifier used in callgraph section for indirect calls
+    /// and targets.
+    using CGTypeId = uint64_t;
+
+    /// Enumeration of function kinds, and their mapping to function kind values
+    /// stored in callgraph section entries.
+    /// Must match the enum in llvm/tools/llvm-objdump/llvm-objdump.cpp.
+    enum class FunctionKind : uint64_t {
+      /// Function cannot be target to indirect calls.
+      NOT_INDIRECT_TARGET = 0,
+
+      /// Function may be target to indirect calls but its type id is unknown.
+      INDIRECT_TARGET_UNKNOWN_TID = 1,
+
+      /// Function may be target to indirect calls and its type id is known.
+      INDIRECT_TARGET_KNOWN_TID = 2,
+    };
+
+    /// Map type identifiers to callsite labels. Labels are generated for each
+    /// indirect callsite in the function.
+    SmallVector<std::pair<CGTypeId, MCSymbol *>> CallSiteLabels;
+  };
+
+  enum CallGraphSectionFormatVersion : uint64_t {
+    V_0 = 0,
+  };
+
   /// Output stream for the stack usage file (i.e., .su file).
   std::unique_ptr<raw_fd_ostream> StackUsageStream;
 
@@ -355,6 +385,13 @@ public:
   /// are available. Returns empty string otherwise.
   StringRef getConstantSectionSuffix(const Constant *C) const;
 
+  /// Generate and emit labels for callees of the indirect callsites which will
+  /// be used to populate the .callgraph section.
+  void emitIndirectCalleeLabels(
+      FunctionInfo &FuncInfo,
+      const MachineFunction::CallSiteInfoMap &CallSitesInfoMap,
+      const MachineInstr &MI);
+
   //===------------------------------------------------------------------===//
   // XRay instrumentation implementation.
   //===------------------------------------------------------------------===//
@@ -442,6 +479,8 @@ public:
   void emitKCFITrapEntry(const MachineFunction &MF, const MCSymbol *Symbol);
   virtual void emitKCFITypeId(const MachineFunction &MF);
 
+  void emitCallGraphSection(const MachineFunction &MF, FunctionInfo &FuncInfo);
+
   void emitPseudoProbe(const MachineInstr &MI);
 
   void emitRemarksSection(remarks::RemarkStreamer &RS);
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 938d71d..9e3d919 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -323,10 +323,11 @@ public:
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
 
-  /// Returns true if the original IR terminator is an `indirectbr`. This
-  /// typically corresponds to a `goto` in C, rather than jump tables.
-  bool terminatorIsComputedGoto() const {
-    return back().isIndirectBranch() &&
+  /// Returns true if the original IR terminator is an `indirectbr` with
+  /// successor blocks. This typically corresponds to a `goto` in C, rather than
+  /// jump tables.
+  bool terminatorIsComputedGotoWithSuccessors() const {
+    return back().isIndirectBranch() && !succ_empty() &&
            llvm::all_of(successors(), [](const MachineBasicBlock *Succ) {
              return Succ->isIRBlockAddressTaken();
            });
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index 7f88323..06c4daf 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -517,6 +517,13 @@ public:
     SmallVector<ArgRegPair, 1> ArgRegPairs;
     /// Callee type ids.
     SmallVector<ConstantInt *, 4> CalleeTypeIds;
+
+    CallSiteInfo() = default;
+
+    /// Extracts the numeric type id from the CallBase's callee_type Metadata,
+    /// and sets CalleeTypeIds. This is used as type id for the indirect call in
+    /// the call graph section.
+    CallSiteInfo(const CallBase &CB);
   };
 
   struct CalledGlobalInfo {
@@ -524,11 +531,12 @@ public:
     unsigned TargetFlags;
   };
 
+  using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>;
+
 private:
   Delegate *TheDelegate = nullptr;
   GISelChangeObserver *Observer = nullptr;
 
-  using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>;
   /// Map a call instruction to call site arguments forwarding info.
   CallSiteInfoMap CallSitesInfo;
 
diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index e63e77a..e705d7d9 100644
--- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -69,6 +69,32 @@ enum {
 
 } // end namespace RegState
 
+/// Set of metadata that should be preserved when using BuildMI(). This provides
+/// a more convenient way of preserving DebugLoc, PCSections and MMRA.
+class MIMetadata {
+public:
+  MIMetadata() = default;
+  MIMetadata(DebugLoc DL, MDNode *PCSections = nullptr, MDNode *MMRA = nullptr)
+      : DL(std::move(DL)), PCSections(PCSections), MMRA(MMRA) {}
+  MIMetadata(const DILocation *DI, MDNode *PCSections = nullptr,
+             MDNode *MMRA = nullptr)
+      : DL(DI), PCSections(PCSections), MMRA(MMRA) {}
+  explicit MIMetadata(const Instruction &From)
+      : DL(From.getDebugLoc()),
+        PCSections(From.getMetadata(LLVMContext::MD_pcsections)) {}
+  explicit MIMetadata(const MachineInstr &From)
+      : DL(From.getDebugLoc()), PCSections(From.getPCSections()) {}
+
+  const DebugLoc &getDL() const { return DL; }
+  MDNode *getPCSections() const { return PCSections; }
+  MDNode *getMMRAMetadata() const { return MMRA; }
+
+private:
+  DebugLoc DL;
+  MDNode *PCSections = nullptr;
+  MDNode *MMRA = nullptr;
+};
+
 class MachineInstrBuilder {
   MachineFunction *MF = nullptr;
   MachineInstr *MI = nullptr;
@@ -317,15 +343,11 @@ public:
     }
   }
 
-  const MachineInstrBuilder &setPCSections(MDNode *MD) const {
-    if (MD)
-      MI->setPCSections(*MF, MD);
-    return *this;
-  }
-
-  const MachineInstrBuilder &setMMRAMetadata(MDNode *MMRA) const {
-    if (MMRA)
-      MI->setMMRAMetadata(*MF, MMRA);
+  const MachineInstrBuilder &copyMIMetadata(const MIMetadata &MIMD) const {
+    if (MIMD.getPCSections())
+      MI->setPCSections(*MF, MIMD.getPCSections());
+    if (MIMD.getMMRAMetadata())
+      MI->setMMRAMetadata(*MF, MIMD.getMMRAMetadata());
     return *this;
   }
 
@@ -343,38 +365,11 @@ public:
   }
 };
 
-/// Set of metadata that should be preserved when using BuildMI(). This provides
-/// a more convenient way of preserving DebugLoc, PCSections and MMRA.
-class MIMetadata {
-public:
-  MIMetadata() = default;
-  MIMetadata(DebugLoc DL, MDNode *PCSections = nullptr, MDNode *MMRA = nullptr)
-      : DL(std::move(DL)), PCSections(PCSections), MMRA(MMRA) {}
-  MIMetadata(const DILocation *DI, MDNode *PCSections = nullptr,
-             MDNode *MMRA = nullptr)
-      : DL(DI), PCSections(PCSections), MMRA(MMRA) {}
-  explicit MIMetadata(const Instruction &From)
-      : DL(From.getDebugLoc()),
-        PCSections(From.getMetadata(LLVMContext::MD_pcsections)) {}
-  explicit MIMetadata(const MachineInstr &From)
-      : DL(From.getDebugLoc()), PCSections(From.getPCSections()) {}
-
-  const DebugLoc &getDL() const { return DL; }
-  MDNode *getPCSections() const { return PCSections; }
-  MDNode *getMMRAMetadata() const { return MMRA; }
-
-private:
-  DebugLoc DL;
-  MDNode *PCSections = nullptr;
-  MDNode *MMRA = nullptr;
-};
-
 /// Builder interface. Specify how to create the initial instruction itself.
 inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD,
                                    const MCInstrDesc &MCID) {
   return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, MIMD.getDL()))
-      .setPCSections(MIMD.getPCSections())
-      .setMMRAMetadata(MIMD.getMMRAMetadata());
+      .copyMIMetadata(MIMD);
 }
 
 /// This version of the builder sets up the first operand as a
@@ -382,8 +377,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD,
 inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD,
                                    const MCInstrDesc &MCID, Register DestReg) {
   return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, MIMD.getDL()))
-      .setPCSections(MIMD.getPCSections())
-      .setMMRAMetadata(MIMD.getMMRAMetadata())
+      .copyMIMetadata(MIMD)
       .addReg(DestReg, RegState::Define);
 }
 
@@ -397,10 +391,8 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   MachineFunction &MF = *BB.getParent();
   MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL());
   BB.insert(I, MI);
-  return MachineInstrBuilder(MF, MI)
-      .setPCSections(MIMD.getPCSections())
-      .setMMRAMetadata(MIMD.getMMRAMetadata())
-      .addReg(DestReg, RegState::Define);
+  return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD).addReg(
+      DestReg, RegState::Define);
 }
 
 /// This version of the builder inserts the newly-built instruction before
@@ -416,10 +408,8 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   MachineFunction &MF = *BB.getParent();
   MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL());
   BB.insert(I, MI);
-  return MachineInstrBuilder(MF, MI)
-      .setPCSections(MIMD.getPCSections())
-      .setMMRAMetadata(MIMD.getMMRAMetadata())
-      .addReg(DestReg, RegState::Define);
+  return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD).addReg(
+      DestReg, RegState::Define);
 }
 
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I,
@@ -449,9 +439,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   MachineFunction &MF = *BB.getParent();
   MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL());
   BB.insert(I, MI);
-  return MachineInstrBuilder(MF, MI)
-      .setPCSections(MIMD.getPCSections())
-      .setMMRAMetadata(MIMD.getMMRAMetadata());
+  return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD);
 }
 
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
@@ -461,9 +449,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   MachineFunction &MF = *BB.getParent();
   MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL());
   BB.insert(I, MI);
-  return MachineInstrBuilder(MF, MI)
-      .setPCSections(MIMD.getPCSections())
-      .setMMRAMetadata(MIMD.getMMRAMetadata());
+  return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD);
 }
 
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I,
diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index efda7eb..5a2aee2 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -1303,8 +1303,8 @@ protected:
   SchedBoundary Top;
   SchedBoundary Bot;
 
-  ClusterInfo *TopCluster;
-  ClusterInfo *BotCluster;
+  unsigned TopClusterID;
+  unsigned BotClusterID;
 
   /// Candidate last picked from Top boundary.
   SchedCandidate TopCand;
@@ -1346,8 +1346,8 @@ protected:
   /// Candidate last picked from Bot boundary.
   SchedCandidate BotCand;
 
-  ClusterInfo *TopCluster;
-  ClusterInfo *BotCluster;
+  unsigned TopClusterID;
+  unsigned BotClusterID;
 
 public:
   PostGenericScheduler(const MachineSchedContext *C)
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 2967532..be90250 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -578,6 +578,18 @@ m_InsertSubvector(const LHS &Base, const RHS &Sub, const IDX &Idx) {
   return TernaryOpc_match<LHS, RHS, IDX>(ISD::INSERT_SUBVECTOR, Base, Sub, Idx);
 }
 
+template <typename LTy, typename RTy, typename TTy, typename FTy, typename CCTy>
+inline auto m_SelectCC(const LTy &L, const RTy &R, const TTy &T, const FTy &F,
+                       const CCTy &CC) {
+  return m_Node(ISD::SELECT_CC, L, R, T, F, CC);
+}
+
+template <typename LTy, typename RTy, typename TTy, typename FTy, typename CCTy>
+inline auto m_SelectCCLike(const LTy &L, const RTy &R, const TTy &T,
+                           const FTy &F, const CCTy &CC) {
+  return m_AnyOf(m_Select(m_SetCC(L, R, CC), T, F), m_SelectCC(L, R, T, F, CC));
+}
+
 // === Binary operations ===
 template <typename LHS_P, typename RHS_P, bool Commutable = false,
           bool ExcludeChain = false>
diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h
index 3a0a31b..122b7be 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAG.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h
@@ -240,6 +240,11 @@ class TargetRegisterInfo;
   typedef SmallSet<SUnit *, 8> ClusterInfo;
   constexpr unsigned InvalidClusterId = ~0u;
 
+  /// Return whether the input cluster ID's are the same and valid.
+  inline bool isTheSameCluster(unsigned A, unsigned B) {
+    return A != InvalidClusterId && A == B;
+  }
+
   /// Scheduling unit. This is a node in the scheduling DAG.
   class SUnit {
   private:
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 8f88811..11ae8cd 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -182,7 +182,7 @@ public:
     return SDValue(Node, R);
   }
 
-  /// Return true if this node is an operand of N.
+  /// Return true if the referenced return value is an operand of N.
   LLVM_ABI bool isOperandOf(const SDNode *N) const;
 
   /// Return the ValueType of the referenced return value.
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 4551e7e..b06158d 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -92,258 +92,270 @@ def v1i1    : VTVec<1,    i1, 17>;  //    1 x i1 vector value
 def v2i1    : VTVec<2,    i1, 18>;  //    2 x i1 vector value
 def v3i1    : VTVec<3,    i1, 19>;  //    3 x i1 vector value
 def v4i1    : VTVec<4,    i1, 20>;  //    4 x i1 vector value
-def v8i1    : VTVec<8,    i1, 21>;  //    8 x i1 vector value
-def v16i1   : VTVec<16,   i1, 22>;  //   16 x i1 vector value
-def v32i1   : VTVec<32,   i1, 23>;  //   32 x i1 vector value
-def v64i1   : VTVec<64,   i1, 24>;  //   64 x i1 vector value
-def v128i1  : VTVec<128,  i1, 25>;  //  128 x i1 vector value
-def v256i1  : VTVec<256,  i1, 26>;  //  256 x i1 vector value
-def v512i1  : VTVec<512,  i1, 27>;  //  512 x i1 vector value
-def v1024i1 : VTVec<1024, i1, 28>;  // 1024 x i1 vector value
-def v2048i1 : VTVec<2048, i1, 29>;  // 2048 x i1 vector value
-def v4096i1 : VTVec<4096, i1, 30>;  // 4096 x i1 vector value
-
-def v128i2  : VTVec<128,  i2, 31>;   //  128 x i2 vector value
-def v256i2  : VTVec<256,  i2, 32>;   //  256 x i2 vector value
-
-def v64i4   : VTVec<64,   i4, 33>;   //   64 x i4 vector value
-def v128i4  : VTVec<128,  i4, 34>;   //  128 x i4 vector value
-
-def v1i8    : VTVec<1,    i8, 35>;  //    1 x i8 vector value
-def v2i8    : VTVec<2,    i8, 36>;  //    2 x i8 vector value
-def v3i8    : VTVec<3,    i8, 37>;  //    3 x i8 vector value
-def v4i8    : VTVec<4,    i8, 38>;  //    4 x i8 vector value
-def v8i8    : VTVec<8,    i8, 39>;  //    8 x i8 vector value
-def v16i8   : VTVec<16,   i8, 40>;  //   16 x i8 vector value
-def v32i8   : VTVec<32,   i8, 41>;  //   32 x i8 vector value
-def v64i8   : VTVec<64,   i8, 42>;  //   64 x i8 vector value
-def v128i8  : VTVec<128,  i8, 43>;  //  128 x i8 vector value
-def v256i8  : VTVec<256,  i8, 44>;  //  256 x i8 vector value
-def v512i8  : VTVec<512,  i8, 45>;  //  512 x i8 vector value
-def v1024i8 : VTVec<1024, i8, 46>;  // 1024 x i8 vector value
-
-def v1i16    : VTVec<1,    i16, 47>;  //    1 x i16 vector value
-def v2i16    : VTVec<2,    i16, 48>;  //    2 x i16 vector value
-def v3i16    : VTVec<3,    i16, 49>;  //    3 x i16 vector value
-def v4i16    : VTVec<4,    i16, 50>;  //    4 x i16 vector value
-def v8i16    : VTVec<8,    i16, 51>;  //    8 x i16 vector value
-def v16i16   : VTVec<16,   i16, 52>;  //   16 x i16 vector value
-def v32i16   : VTVec<32,   i16, 53>;  //   32 x i16 vector value
-def v64i16   : VTVec<64,   i16, 54>;  //   64 x i16 vector value
-def v128i16  : VTVec<128,  i16, 55>;  //  128 x i16 vector value
-def v256i16  : VTVec<256,  i16, 56>;  //  256 x i16 vector value
-def v512i16  : VTVec<512,  i16, 57>;  //  512 x i16 vector value
-def v4096i16 : VTVec<4096, i16, 58>;  // 4096 x i16 vector value
-
-def v1i32    : VTVec<1,    i32, 59>;  //    1 x i32 vector value
-def v2i32    : VTVec<2,    i32, 60>;  //    2 x i32 vector value
-def v3i32    : VTVec<3,    i32, 61>;  //    3 x i32 vector value
-def v4i32    : VTVec<4,    i32, 62>;  //    4 x i32 vector value
-def v5i32    : VTVec<5,    i32, 63>;  //    5 x i32 vector value
-def v6i32    : VTVec<6,    i32, 64>;  //    6 x f32 vector value
-def v7i32    : VTVec<7,    i32, 65>;  //    7 x f32 vector value
-def v8i32    : VTVec<8,    i32, 66>;  //    8 x i32 vector value
-def v9i32    : VTVec<9,    i32, 67>;  //    9 x i32 vector value
-def v10i32   : VTVec<10,   i32, 68>;  //   10 x i32 vector value
-def v11i32   : VTVec<11,   i32, 69>;  //   11 x i32 vector value
-def v12i32   : VTVec<12,   i32, 70>;  //   12 x i32 vector value
-def v16i32   : VTVec<16,   i32, 71>;  //   16 x i32 vector value
-def v32i32   : VTVec<32,   i32, 72>;  //   32 x i32 vector value
-def v64i32   : VTVec<64,   i32, 73>;  //   64 x i32 vector value
-def v128i32  : VTVec<128,  i32, 74>;  //  128 x i32 vector value
-def v256i32  : VTVec<256,  i32, 75>;  //  256 x i32 vector value
-def v512i32  : VTVec<512,  i32, 76>;  //  512 x i32 vector value
-def v1024i32 : VTVec<1024, i32, 77>;  // 1024 x i32 vector value
-def v2048i32 : VTVec<2048, i32, 78>;  // 2048 x i32 vector value
-def v4096i32 : VTVec<4096, i32, 79>;  // 4096 x i32 vector value
-
-def v1i64   : VTVec<1,   i64, 80>;  //   1 x i64 vector value
-def v2i64   : VTVec<2,   i64, 81>;  //   2 x i64 vector value
-def v3i64   : VTVec<3,   i64, 82>;  //   3 x i64 vector value
-def v4i64   : VTVec<4,   i64, 83>;  //   4 x i64 vector value
-def v8i64   : VTVec<8,   i64, 84>;  //   8 x i64 vector value
-def v16i64  : VTVec<16,  i64, 85>;  //  16 x i64 vector value
-def v32i64  : VTVec<32,  i64, 86>;  //  32 x i64 vector value
-def v64i64  : VTVec<64,  i64, 87>;  //  64 x i64 vector value
-def v128i64 : VTVec<128, i64, 88>;  // 128 x i64 vector value
-def v256i64 : VTVec<256, i64, 89>;  // 256 x i64 vector value
-
-def v1i128  : VTVec<1,  i128, 90>;  //  1 x i128 vector value
-
-def v1f16    : VTVec<1,    f16,  91>;  //    1 x f16 vector value
-def v2f16    : VTVec<2,    f16,  92>;  //    2 x f16 vector value
-def v3f16    : VTVec<3,    f16,  93>;  //    3 x f16 vector value
-def v4f16    : VTVec<4,    f16,  94>;  //    4 x f16 vector value
-def v8f16    : VTVec<8,    f16,  95>;  //    8 x f16 vector value
-def v16f16   : VTVec<16,   f16,  96>;  //   16 x f16 vector value
-def v32f16   : VTVec<32,   f16,  97>;  //   32 x f16 vector value
-def v64f16   : VTVec<64,   f16,  98>;  //   64 x f16 vector value
-def v128f16  : VTVec<128,  f16,  99>;  //  128 x f16 vector value
-def v256f16  : VTVec<256,  f16, 100>;  //  256 x f16 vector value
-def v512f16  : VTVec<512,  f16, 101>;  //  512 x f16 vector value
-def v4096f16 : VTVec<4096, f16, 102>;  // 4096 x f16 vector value
-
-def v1bf16    : VTVec<1,    bf16, 103>;  //    1 x bf16 vector value
-def v2bf16    : VTVec<2,    bf16, 104>;  //    2 x bf16 vector value
-def v3bf16    : VTVec<3,    bf16, 105>;  //    3 x bf16 vector value
-def v4bf16    : VTVec<4,    bf16, 106>;  //    4 x bf16 vector value
-def v8bf16    : VTVec<8,    bf16, 107>;  //    8 x bf16 vector value
-def v16bf16   : VTVec<16,   bf16, 108>;  //   16 x bf16 vector value
-def v32bf16   : VTVec<32,   bf16, 109>;  //   32 x bf16 vector value
-def v64bf16   : VTVec<64,   bf16, 110>;  //   64 x bf16 vector value
-def v128bf16  : VTVec<128,  bf16, 111>;  //  128 x bf16 vector value
-def v4096bf16 : VTVec<4096, bf16, 112>;  // 4096 x bf16 vector value
-
-def v1f32    : VTVec<1,    f32, 113>;  //    1 x f32 vector value
-def v2f32    : VTVec<2,    f32, 114>;  //    2 x f32 vector value
-def v3f32    : VTVec<3,    f32, 115>;  //    3 x f32 vector value
-def v4f32    : VTVec<4,    f32, 116>;  //    4 x f32 vector value
-def v5f32    : VTVec<5,    f32, 117>;  //    5 x f32 vector value
-def v6f32    : VTVec<6,    f32, 118>;  //    6 x f32 vector value
-def v7f32    : VTVec<7,    f32, 119>;  //    7 x f32 vector value
-def v8f32    : VTVec<8,    f32, 120>;  //    8 x f32 vector value
-def v9f32    : VTVec<9,    f32, 121>;  //    9 x f32 vector value
-def v10f32   : VTVec<10,   f32, 122>;  //   10 x f32 vector value
-def v11f32   : VTVec<11,   f32, 123>;  //   11 x f32 vector value
-def v12f32   : VTVec<12,   f32, 124>;  //   12 x f32 vector value
-def v16f32   : VTVec<16,   f32, 125>;  //   16 x f32 vector value
-def v32f32   : VTVec<32,   f32, 126>;  //   32 x f32 vector value
-def v64f32   : VTVec<64,   f32, 127>;  //   64 x f32 vector value
-def v128f32  : VTVec<128,  f32, 128>;  //  128 x f32 vector value
-def v256f32  : VTVec<256,  f32, 129>;  //  256 x f32 vector value
-def v512f32  : VTVec<512,  f32, 130>;  //  512 x f32 vector value
-def v1024f32 : VTVec<1024, f32, 131>;  // 1024 x f32 vector value
-def v2048f32 : VTVec<2048, f32, 132>;  // 2048 x f32 vector value
-
-def v1f64    : VTVec<1,    f64, 133>;  //    1 x f64 vector value
-def v2f64    : VTVec<2,    f64, 134>;  //    2 x f64 vector value
-def v3f64    : VTVec<3,    f64, 135>;  //    3 x f64 vector value
-def v4f64    : VTVec<4,    f64, 136>;  //    4 x f64 vector value
-def v8f64    : VTVec<8,    f64, 137>;  //    8 x f64 vector value
-def v16f64   : VTVec<16,   f64, 138>;  //   16 x f64 vector value
-def v32f64   : VTVec<32,   f64, 139>;  //   32 x f64 vector value
-def v64f64   : VTVec<64,   f64, 140>;  //   64 x f64 vector value
-def v128f64  : VTVec<128,  f64, 141>;  //  128 x f64 vector value
-def v256f64  : VTVec<256,  f64, 142>;  //  256 x f64 vector value
-
-def nxv1i1  : VTScalableVec<1,  i1, 143>;  // n x  1 x i1  vector value
-def nxv2i1  : VTScalableVec<2,  i1, 144>;  // n x  2 x i1  vector value
-def nxv4i1  : VTScalableVec<4,  i1, 145>;  // n x  4 x i1  vector value
-def nxv8i1  : VTScalableVec<8,  i1, 146>;  // n x  8 x i1  vector value
-def nxv16i1 : VTScalableVec<16, i1, 147>;  // n x 16 x i1  vector value
-def nxv32i1 : VTScalableVec<32, i1, 148>;  // n x 32 x i1  vector value
-def nxv64i1 : VTScalableVec<64, i1, 149>;  // n x 64 x i1  vector value
-
-def nxv1i8  : VTScalableVec<1,  i8, 150>;  // n x  1 x i8  vector value
-def nxv2i8  : VTScalableVec<2,  i8, 151>;  // n x  2 x i8  vector value
-def nxv4i8  : VTScalableVec<4,  i8, 152>;  // n x  4 x i8  vector value
-def nxv8i8  : VTScalableVec<8,  i8, 153>;  // n x  8 x i8  vector value
-def nxv16i8 : VTScalableVec<16, i8, 154>;  // n x 16 x i8  vector value
-def nxv32i8 : VTScalableVec<32, i8, 155>;  // n x 32 x i8  vector value
-def nxv64i8 : VTScalableVec<64, i8, 156>;  // n x 64 x i8  vector value
-
-def nxv1i16  : VTScalableVec<1,  i16, 157>;  // n x  1 x i16 vector value
-def nxv2i16  : VTScalableVec<2,  i16, 158>;  // n x  2 x i16 vector value
-def nxv4i16  : VTScalableVec<4,  i16, 159>;  // n x  4 x i16 vector value
-def nxv8i16  : VTScalableVec<8,  i16, 160>;  // n x  8 x i16 vector value
-def nxv16i16 : VTScalableVec<16, i16, 161>;  // n x 16 x i16 vector value
-def nxv32i16 : VTScalableVec<32, i16, 162>;  // n x 32 x i16 vector value
-
-def nxv1i32  : VTScalableVec<1,  i32, 163>;  // n x  1 x i32 vector value
-def nxv2i32  : VTScalableVec<2,  i32, 164>;  // n x  2 x i32 vector value
-def nxv4i32  : VTScalableVec<4,  i32, 165>;  // n x  4 x i32 vector value
-def nxv8i32  : VTScalableVec<8,  i32, 166>;  // n x  8 x i32 vector value
-def nxv16i32 : VTScalableVec<16, i32, 167>;  // n x 16 x i32 vector value
-def nxv32i32 : VTScalableVec<32, i32, 168>;  // n x 32 x i32 vector value
-
-def nxv1i64  : VTScalableVec<1,  i64, 169>;  // n x  1 x i64 vector value
-def nxv2i64  : VTScalableVec<2,  i64, 170>;  // n x  2 x i64 vector value
-def nxv4i64  : VTScalableVec<4,  i64, 171>;  // n x  4 x i64 vector value
-def nxv8i64  : VTScalableVec<8,  i64, 172>;  // n x  8 x i64 vector value
-def nxv16i64 : VTScalableVec<16, i64, 173>;  // n x 16 x i64 vector value
-def nxv32i64 : VTScalableVec<32, i64, 174>;  // n x 32 x i64 vector value
-
-def nxv1f16  : VTScalableVec<1,  f16, 175>;  // n x  1 x  f16 vector value
-def nxv2f16  : VTScalableVec<2,  f16, 176>;  // n x  2 x  f16 vector value
-def nxv4f16  : VTScalableVec<4,  f16, 177>;  // n x  4 x  f16 vector value
-def nxv8f16  : VTScalableVec<8,  f16, 178>;  // n x  8 x  f16 vector value
-def nxv16f16 : VTScalableVec<16, f16, 179>;  // n x 16 x  f16 vector value
-def nxv32f16 : VTScalableVec<32, f16, 180>;  // n x 32 x  f16 vector value
-
-def nxv1bf16  : VTScalableVec<1,  bf16, 181>;  // n x  1 x bf16 vector value
-def nxv2bf16  : VTScalableVec<2,  bf16, 182>;  // n x  2 x bf16 vector value
-def nxv4bf16  : VTScalableVec<4,  bf16, 183>;  // n x  4 x bf16 vector value
-def nxv8bf16  : VTScalableVec<8,  bf16, 184>;  // n x  8 x bf16 vector value
-def nxv16bf16 : VTScalableVec<16, bf16, 185>;  // n x 16 x bf16 vector value
-def nxv32bf16 : VTScalableVec<32, bf16, 186>;  // n x 32 x bf16 vector value
-
-def nxv1f32  : VTScalableVec<1,  f32, 187>;  // n x  1 x  f32 vector value
-def nxv2f32  : VTScalableVec<2,  f32, 188>;  // n x  2 x  f32 vector value
-def nxv4f32  : VTScalableVec<4,  f32, 189>;  // n x  4 x  f32 vector value
-def nxv8f32  : VTScalableVec<8,  f32, 190>;  // n x  8 x  f32 vector value
-def nxv16f32 : VTScalableVec<16, f32, 191>;  // n x 16 x  f32 vector value
-
-def nxv1f64  : VTScalableVec<1,  f64, 192>;  // n x  1 x  f64 vector value
-def nxv2f64  : VTScalableVec<2,  f64, 193>;  // n x  2 x  f64 vector value
-def nxv4f64  : VTScalableVec<4,  f64, 194>;  // n x  4 x  f64 vector value
-def nxv8f64  : VTScalableVec<8,  f64, 195>;  // n x  8 x  f64 vector value
+def v5i1    : VTVec<5,    i1, 21>;  //    5 x i1 vector value
+def v6i1    : VTVec<6,    i1, 22>;  //    6 x i1 vector value
+def v7i1    : VTVec<7,    i1, 23>;  //    7 x i1 vector value
+def v8i1    : VTVec<8,    i1, 24>;  //    8 x i1 vector value
+def v16i1   : VTVec<16,   i1, 25>;  //   16 x i1 vector value
+def v32i1   : VTVec<32,   i1, 26>;  //   32 x i1 vector value
+def v64i1   : VTVec<64,   i1, 27>;  //   64 x i1 vector value
+def v128i1  : VTVec<128,  i1, 28>;  //  128 x i1 vector value
+def v256i1  : VTVec<256,  i1, 29>;  //  256 x i1 vector value
+def v512i1  : VTVec<512,  i1, 30>;  //  512 x i1 vector value
+def v1024i1 : VTVec<1024, i1, 31>;  // 1024 x i1 vector value
+def v2048i1 : VTVec<2048, i1, 32>;  // 2048 x i1 vector value
+def v4096i1 : VTVec<4096, i1, 33>;  // 4096 x i1 vector value
+
+def v128i2  : VTVec<128,  i2, 34>;   //  128 x i2 vector value
+def v256i2  : VTVec<256,  i2, 35>;   //  256 x i2 vector value
+
+def v64i4   : VTVec<64,   i4, 36>;   //   64 x i4 vector value
+def v128i4  : VTVec<128,  i4, 37>;   //  128 x i4 vector value
+
+def v1i8    : VTVec<1,    i8, 38>;  //    1 x i8 vector value
+def v2i8    : VTVec<2,    i8, 39>;  //    2 x i8 vector value
+def v3i8    : VTVec<3,    i8, 40>;  //    3 x i8 vector value
+def v4i8    : VTVec<4,    i8, 41>;  //    4 x i8 vector value
+def v5i8    : VTVec<5,    i8, 42>;  //    5 x i8 vector value
+def v6i8    : VTVec<6,    i8, 43>;  //    6 x i8 vector value
+def v7i8    : VTVec<7,    i8, 44>;  //    7 x i8 vector value
+def v8i8    : VTVec<8,    i8, 45>;  //    8 x i8 vector value
+def v16i8   : VTVec<16,   i8, 46>;  //   16 x i8 vector value
+def v32i8   : VTVec<32,   i8, 47>;  //   32 x i8 vector value
+def v64i8   : VTVec<64,   i8, 48>;  //   64 x i8 vector value
+def v128i8  : VTVec<128,  i8, 49>;  //  128 x i8 vector value
+def v256i8  : VTVec<256,  i8, 50>;  //  256 x i8 vector value
+def v512i8  : VTVec<512,  i8, 51>;  //  512 x i8 vector value
+def v1024i8 : VTVec<1024, i8, 52>;  // 1024 x i8 vector value
+
+def v1i16    : VTVec<1,    i16, 53>;  //    1 x i16 vector value
+def v2i16    : VTVec<2,    i16, 54>;  //    2 x i16 vector value
+def v3i16    : VTVec<3,    i16, 55>;  //    3 x i16 vector value
+def v4i16    : VTVec<4,    i16, 56>;  //    4 x i16 vector value
+def v5i16    : VTVec<5,    i16, 57>;  //    5 x i16 vector value
+def v6i16    : VTVec<6,    i16, 58>;  //    6 x i16 vector value
+def v7i16    : VTVec<7,    i16, 59>;  //    7 x i16 vector value
+def v8i16    : VTVec<8,    i16, 60>;  //    8 x i16 vector value
+def v16i16   : VTVec<16,   i16, 61>;  //   16 x i16 vector value
+def v32i16   : VTVec<32,   i16, 62>;  //   32 x i16 vector value
+def v64i16   : VTVec<64,   i16, 63>;  //   64 x i16 vector value
+def v128i16  : VTVec<128,  i16, 64>;  //  128 x i16 vector value
+def v256i16  : VTVec<256,  i16, 65>;  //  256 x i16 vector value
+def v512i16  : VTVec<512,  i16, 66>;  //  512 x i16 vector value
+def v4096i16 : VTVec<4096, i16, 67>;  // 4096 x i16 vector value
+
+def v1i32    : VTVec<1,    i32, 68>;  //    1 x i32 vector value
+def v2i32    : VTVec<2,    i32, 69>;  //    2 x i32 vector value
+def v3i32    : VTVec<3,    i32, 70>;  //    3 x i32 vector value
+def v4i32    : VTVec<4,    i32, 71>;  //    4 x i32 vector value
+def v5i32    : VTVec<5,    i32, 72>;  //    5 x i32 vector value
+def v6i32    : VTVec<6,    i32, 73>;  //    6 x i32 vector value
+def v7i32    : VTVec<7,    i32, 74>;  //    7 x i32 vector value
+def v8i32    : VTVec<8,    i32, 75>;  //    8 x i32 vector value
+def v9i32    : VTVec<9,    i32, 76>;  //    9 x i32 vector value
+def v10i32   : VTVec<10,   i32, 77>;  //   10 x i32 vector value
+def v11i32   : VTVec<11,   i32, 78>;  //   11 x i32 vector value
+def v12i32   : VTVec<12,   i32, 79>;  //   12 x i32 vector value
+def v16i32   : VTVec<16,   i32, 80>;  //   16 x i32 vector value
+def v32i32   : VTVec<32,   i32, 81>;  //   32 x i32 vector value
+def v64i32   : VTVec<64,   i32, 82>;  //   64 x i32 vector value
+def v128i32  : VTVec<128,  i32, 83>;  //  128 x i32 vector value
+def v256i32  : VTVec<256,  i32, 84>;  //  256 x i32 vector value
+def v512i32  : VTVec<512,  i32, 85>;  //  512 x i32 vector value
+def v1024i32 : VTVec<1024, i32, 86>;  // 1024 x i32 vector value
+def v2048i32 : VTVec<2048, i32, 87>;  // 2048 x i32 vector value
+def v4096i32 : VTVec<4096, i32, 88>;  // 4096 x i32 vector value
+
+def v1i64   : VTVec<1,   i64, 89>;  //   1 x i64 vector value
+def v2i64   : VTVec<2,   i64, 90>;  //   2 x i64 vector value
+def v3i64   : VTVec<3,   i64, 91>;  //   3 x i64 vector value
+def v4i64   : VTVec<4,   i64, 92>;  //   4 x i64 vector value
+def v8i64   : VTVec<8,   i64, 93>;  //   8 x i64 vector value
+def v16i64  : VTVec<16,  i64, 94>;  //  16 x i64 vector value
+def v32i64  : VTVec<32,  i64, 95>;  //  32 x i64 vector value
+def v64i64  : VTVec<64,  i64, 96>;  //  64 x i64 vector value
+def v128i64 : VTVec<128, i64, 97>;  // 128 x i64 vector value
+def v256i64 : VTVec<256, i64, 98>;  // 256 x i64 vector value
+
+def v1i128  : VTVec<1,  i128, 99>;  //  1 x i128 vector value
+
+def v1f16    : VTVec<1,    f16, 100>;  //    1 x f16 vector value
+def v2f16    : VTVec<2,    f16, 101>;  //    2 x f16 vector value
+def v3f16    : VTVec<3,    f16, 102>;  //    3 x f16 vector value
+def v4f16    : VTVec<4,    f16, 103>;  //    4 x f16 vector value
+def v5f16    : VTVec<5,    f16, 104>;  //    5 x f16 vector value
+def v6f16    : VTVec<6,    f16, 105>;  //    6 x f16 vector value
+def v7f16    : VTVec<7,    f16, 106>;  //    7 x f16 vector value
+def v8f16    : VTVec<8,    f16, 107>;  //    8 x f16 vector value
+def v16f16   : VTVec<16,   f16, 108>;  //   16 x f16 vector value
+def v32f16   : VTVec<32,   f16, 109>;  //   32 x f16 vector value
+def v64f16   : VTVec<64,   f16, 110>;  //   64 x f16 vector value
+def v128f16  : VTVec<128,  f16, 111>;  //  128 x f16 vector value
+def v256f16  : VTVec<256,  f16, 112>;  //  256 x f16 vector value
+def v512f16  : VTVec<512,  f16, 113>;  //  512 x f16 vector value
+def v4096f16 : VTVec<4096, f16, 114>;  // 4096 x f16 vector value
+
+def v1bf16    : VTVec<1,    bf16, 115>;  //    1 x bf16 vector value
+def v2bf16    : VTVec<2,    bf16, 116>;  //    2 x bf16 vector value
+def v3bf16    : VTVec<3,    bf16, 117>;  //    3 x bf16 vector value
+def v4bf16    : VTVec<4,    bf16, 118>;  //    4 x bf16 vector value
+def v8bf16    : VTVec<8,    bf16, 119>;  //    8 x bf16 vector value
+def v16bf16   : VTVec<16,   bf16, 120>;  //   16 x bf16 vector value
+def v32bf16   : VTVec<32,   bf16, 121>;  //   32 x bf16 vector value
+def v64bf16   : VTVec<64,   bf16, 122>;  //   64 x bf16 vector value
+def v128bf16  : VTVec<128,  bf16, 123>;  //  128 x bf16 vector value
+def v4096bf16 : VTVec<4096, bf16, 124>;  // 4096 x bf16 vector value
+
+def v1f32    : VTVec<1,    f32, 125>;  //    1 x f32 vector value
+def v2f32    : VTVec<2,    f32, 126>;  //    2 x f32 vector value
+def v3f32    : VTVec<3,    f32, 127>;  //    3 x f32 vector value
+def v4f32    : VTVec<4,    f32, 128>;  //    4 x f32 vector value
+def v5f32    : VTVec<5,    f32, 129>;  //    5 x f32 vector value
+def v6f32    : VTVec<6,    f32, 130>;  //    6 x f32 vector value
+def v7f32    : VTVec<7,    f32, 131>;  //    7 x f32 vector value
+def v8f32    : VTVec<8,    f32, 132>;  //    8 x f32 vector value
+def v9f32    : VTVec<9,    f32, 133>;  //    9 x f32 vector value
+def v10f32   : VTVec<10,   f32, 134>;  //   10 x f32 vector value
+def v11f32   : VTVec<11,   f32, 135>;  //   11 x f32 vector value
+def v12f32   : VTVec<12,   f32, 136>;  //   12 x f32 vector value
+def v16f32   : VTVec<16,   f32, 137>;  //   16 x f32 vector value
+def v32f32   : VTVec<32,   f32, 138>;  //   32 x f32 vector value
+def v64f32   : VTVec<64,   f32, 139>;  //   64 x f32 vector value
+def v128f32  : VTVec<128,  f32, 140>;  //  128 x f32 vector value
+def v256f32  : VTVec<256,  f32, 141>;  //  256 x f32 vector value
+def v512f32  : VTVec<512,  f32, 142>;  //  512 x f32 vector value
+def v1024f32 : VTVec<1024, f32, 143>;  // 1024 x f32 vector value
+def v2048f32 : VTVec<2048, f32, 144>;  // 2048 x f32 vector value
+
+def v1f64    : VTVec<1,    f64, 145>;  //    1 x f64 vector value
+def v2f64    : VTVec<2,    f64, 146>;  //    2 x f64 vector value
+def v3f64    : VTVec<3,    f64, 147>;  //    3 x f64 vector value
+def v4f64    : VTVec<4,    f64, 148>;  //    4 x f64 vector value
+def v8f64    : VTVec<8,    f64, 149>;  //    8 x f64 vector value
+def v16f64   : VTVec<16,   f64, 150>;  //   16 x f64 vector value
+def v32f64   : VTVec<32,   f64, 151>;  //   32 x f64 vector value
+def v64f64   : VTVec<64,   f64, 152>;  //   64 x f64 vector value
+def v128f64  : VTVec<128,  f64, 153>;  //  128 x f64 vector value
+def v256f64  : VTVec<256,  f64, 154>;  //  256 x f64 vector value
+
+def nxv1i1  : VTScalableVec<1,  i1, 155>;  // n x  1 x i1  vector value
+def nxv2i1  : VTScalableVec<2,  i1, 156>;  // n x  2 x i1  vector value
+def nxv4i1  : VTScalableVec<4,  i1, 157>;  // n x  4 x i1  vector value
+def nxv8i1  : VTScalableVec<8,  i1, 158>;  // n x  8 x i1  vector value
+def nxv16i1 : VTScalableVec<16, i1, 159>;  // n x 16 x i1  vector value
+def nxv32i1 : VTScalableVec<32, i1, 160>;  // n x 32 x i1  vector value
+def nxv64i1 : VTScalableVec<64, i1, 161>;  // n x 64 x i1  vector value
+
+def nxv1i8  : VTScalableVec<1,  i8, 162>;  // n x  1 x i8  vector value
+def nxv2i8  : VTScalableVec<2,  i8, 163>;  // n x  2 x i8  vector value
+def nxv4i8  : VTScalableVec<4,  i8, 164>;  // n x  4 x i8  vector value
+def nxv8i8  : VTScalableVec<8,  i8, 165>;  // n x  8 x i8  vector value
+def nxv16i8 : VTScalableVec<16, i8, 166>;  // n x 16 x i8  vector value
+def nxv32i8 : VTScalableVec<32, i8, 167>;  // n x 32 x i8  vector value
+def nxv64i8 : VTScalableVec<64, i8, 168>;  // n x 64 x i8  vector value
+
+def nxv1i16  : VTScalableVec<1,  i16, 169>;  // n x  1 x i16 vector value
+def nxv2i16  : VTScalableVec<2,  i16, 170>;  // n x  2 x i16 vector value
+def nxv4i16  : VTScalableVec<4,  i16, 171>;  // n x  4 x i16 vector value
+def nxv8i16  : VTScalableVec<8,  i16, 172>;  // n x  8 x i16 vector value
+def nxv16i16 : VTScalableVec<16, i16, 173>;  // n x 16 x i16 vector value
+def nxv32i16 : VTScalableVec<32, i16, 174>;  // n x 32 x i16 vector value
+
+def nxv1i32  : VTScalableVec<1,  i32, 175>;  // n x  1 x i32 vector value
+def nxv2i32  : VTScalableVec<2,  i32, 176>;  // n x  2 x i32 vector value
+def nxv4i32  : VTScalableVec<4,  i32, 177>;  // n x  4 x i32 vector value
+def nxv8i32  : VTScalableVec<8,  i32, 178>;  // n x  8 x i32 vector value
+def nxv16i32 : VTScalableVec<16, i32, 179>;  // n x 16 x i32 vector value
+def nxv32i32 : VTScalableVec<32, i32, 180>;  // n x 32 x i32 vector value
+
+def nxv1i64  : VTScalableVec<1,  i64, 181>;  // n x  1 x i64 vector value
+def nxv2i64  : VTScalableVec<2,  i64, 182>;  // n x  2 x i64 vector value
+def nxv4i64  : VTScalableVec<4,  i64, 183>;  // n x  4 x i64 vector value
+def nxv8i64  : VTScalableVec<8,  i64, 184>;  // n x  8 x i64 vector value
+def nxv16i64 : VTScalableVec<16, i64, 185>;  // n x 16 x i64 vector value
+def nxv32i64 : VTScalableVec<32, i64, 186>;  // n x 32 x i64 vector value
+
+def nxv1f16  : VTScalableVec<1,  f16, 187>;  // n x  1 x  f16 vector value
+def nxv2f16  : VTScalableVec<2,  f16, 188>;  // n x  2 x  f16 vector value
+def nxv4f16  : VTScalableVec<4,  f16, 189>;  // n x  4 x  f16 vector value
+def nxv8f16  : VTScalableVec<8,  f16, 190>;  // n x  8 x  f16 vector value
+def nxv16f16 : VTScalableVec<16, f16, 191>;  // n x 16 x  f16 vector value
+def nxv32f16 : VTScalableVec<32, f16, 192>;  // n x 32 x  f16 vector value
+
+def nxv1bf16  : VTScalableVec<1,  bf16, 193>;  // n x  1 x bf16 vector value
+def nxv2bf16  : VTScalableVec<2,  bf16, 194>;  // n x  2 x bf16 vector value
+def nxv4bf16  : VTScalableVec<4,  bf16, 195>;  // n x  4 x bf16 vector value
+def nxv8bf16  : VTScalableVec<8,  bf16, 196>;  // n x  8 x bf16 vector value
+def nxv16bf16 : VTScalableVec<16, bf16, 197>;  // n x 16 x bf16 vector value
+def nxv32bf16 : VTScalableVec<32, bf16, 198>;  // n x 32 x bf16 vector value
+
+def nxv1f32  : VTScalableVec<1,  f32, 199>;  // n x  1 x  f32 vector value
+def nxv2f32  : VTScalableVec<2,  f32, 200>;  // n x  2 x  f32 vector value
+def nxv4f32  : VTScalableVec<4,  f32, 201>;  // n x  4 x  f32 vector value
+def nxv8f32  : VTScalableVec<8,  f32, 202>;  // n x  8 x  f32 vector value
+def nxv16f32 : VTScalableVec<16, f32, 203>;  // n x 16 x  f32 vector value
+
+def nxv1f64  : VTScalableVec<1,  f64, 204>;  // n x  1 x  f64 vector value
+def nxv2f64  : VTScalableVec<2,  f64, 205>;  // n x  2 x  f64 vector value
+def nxv4f64  : VTScalableVec<4,  f64, 206>;  // n x  4 x  f64 vector value
+def nxv8f64  : VTScalableVec<8,  f64, 207>;  // n x  8 x  f64 vector value
 
 // Sz = NF * MinNumElts * 8(bits)
-def riscv_nxv1i8x2   : VTVecTup<16,  2, i8, 196>;  // RISCV vector tuple(min_num_elts=1,  nf=2)
-def riscv_nxv1i8x3   : VTVecTup<24,  3, i8, 197>;  // RISCV vector tuple(min_num_elts=1,  nf=3)
-def riscv_nxv1i8x4   : VTVecTup<32,  4, i8, 198>;  // RISCV vector tuple(min_num_elts=1,  nf=4)
-def riscv_nxv1i8x5   : VTVecTup<40,  5, i8, 199>;  // RISCV vector tuple(min_num_elts=1,  nf=5)
-def riscv_nxv1i8x6   : VTVecTup<48,  6, i8, 200>;  // RISCV vector tuple(min_num_elts=1,  nf=6)
-def riscv_nxv1i8x7   : VTVecTup<56,  7, i8, 201>;  // RISCV vector tuple(min_num_elts=1,  nf=7)
-def riscv_nxv1i8x8   : VTVecTup<64,  8, i8, 202>;  // RISCV vector tuple(min_num_elts=1,  nf=8)
-def riscv_nxv2i8x2   : VTVecTup<32,  2, i8, 203>;  // RISCV vector tuple(min_num_elts=2,  nf=2)
-def riscv_nxv2i8x3   : VTVecTup<48,  3, i8, 204>;  // RISCV vector tuple(min_num_elts=2,  nf=3)
-def riscv_nxv2i8x4   : VTVecTup<64,  4, i8, 205>;  // RISCV vector tuple(min_num_elts=2,  nf=4)
-def riscv_nxv2i8x5   : VTVecTup<80,  5, i8, 206>;  // RISCV vector tuple(min_num_elts=2,  nf=5)
-def riscv_nxv2i8x6   : VTVecTup<96,  6, i8, 207>;  // RISCV vector tuple(min_num_elts=2,  nf=6)
-def riscv_nxv2i8x7   : VTVecTup<112, 7, i8, 208>;  // RISCV vector tuple(min_num_elts=2,  nf=7)
-def riscv_nxv2i8x8   : VTVecTup<128, 8, i8, 209>;  // RISCV vector tuple(min_num_elts=2,  nf=8)
-def riscv_nxv4i8x2   : VTVecTup<64,  2, i8, 210>;  // RISCV vector tuple(min_num_elts=4,  nf=2)
-def riscv_nxv4i8x3   : VTVecTup<96,  3, i8, 211>;  // RISCV vector tuple(min_num_elts=4,  nf=3)
-def riscv_nxv4i8x4   : VTVecTup<128, 4, i8, 212>;  // RISCV vector tuple(min_num_elts=4,  nf=4)
-def riscv_nxv4i8x5   : VTVecTup<160, 5, i8, 213>;  // RISCV vector tuple(min_num_elts=4,  nf=5)
-def riscv_nxv4i8x6   : VTVecTup<192, 6, i8, 214>;  // RISCV vector tuple(min_num_elts=4,  nf=6)
-def riscv_nxv4i8x7   : VTVecTup<224, 7, i8, 215>;  // RISCV vector tuple(min_num_elts=4,  nf=7)
-def riscv_nxv4i8x8   : VTVecTup<256, 8, i8, 216>;  // RISCV vector tuple(min_num_elts=4,  nf=8)
-def riscv_nxv8i8x2   : VTVecTup<128, 2, i8, 217>;  // RISCV vector tuple(min_num_elts=8,  nf=2)
-def riscv_nxv8i8x3   : VTVecTup<192, 3, i8, 218>;  // RISCV vector tuple(min_num_elts=8,  nf=3)
-def riscv_nxv8i8x4   : VTVecTup<256, 4, i8, 219>;  // RISCV vector tuple(min_num_elts=8,  nf=4)
-def riscv_nxv8i8x5   : VTVecTup<320, 5, i8, 220>;  // RISCV vector tuple(min_num_elts=8,  nf=5)
-def riscv_nxv8i8x6   : VTVecTup<384, 6, i8, 221>;  // RISCV vector tuple(min_num_elts=8,  nf=6)
-def riscv_nxv8i8x7   : VTVecTup<448, 7, i8, 222>;  // RISCV vector tuple(min_num_elts=8,  nf=7)
-def riscv_nxv8i8x8   : VTVecTup<512, 8, i8, 223>;  // RISCV vector tuple(min_num_elts=8,  nf=8)
-def riscv_nxv16i8x2  : VTVecTup<256, 2, i8, 224>;  // RISCV vector tuple(min_num_elts=16, nf=2)
-def riscv_nxv16i8x3  : VTVecTup<384, 3, i8, 225>;  // RISCV vector tuple(min_num_elts=16, nf=3)
-def riscv_nxv16i8x4  : VTVecTup<512, 4, i8, 226>;  // RISCV vector tuple(min_num_elts=16, nf=4)
-def riscv_nxv32i8x2  : VTVecTup<512, 2, i8, 227>;  // RISCV vector tuple(min_num_elts=32, nf=2)
-
-def x86mmx    : ValueType<64,   228>;  // X86 MMX value
-def Glue      : ValueType<0,    229>;  // Pre-RA sched glue
-def isVoid    : ValueType<0,    230>;  // Produces no value
-def untyped   : ValueType<8,    231> { // Produces an untyped value
+def riscv_nxv1i8x2   : VTVecTup<16,  2, i8, 208>;  // RISCV vector tuple(min_num_elts=1,  nf=2)
+def riscv_nxv1i8x3   : VTVecTup<24,  3, i8, 209>;  // RISCV vector tuple(min_num_elts=1,  nf=3)
+def riscv_nxv1i8x4   : VTVecTup<32,  4, i8, 210>;  // RISCV vector tuple(min_num_elts=1,  nf=4)
+def riscv_nxv1i8x5   : VTVecTup<40,  5, i8, 211>;  // RISCV vector tuple(min_num_elts=1,  nf=5)
+def riscv_nxv1i8x6   : VTVecTup<48,  6, i8, 212>;  // RISCV vector tuple(min_num_elts=1,  nf=6)
+def riscv_nxv1i8x7   : VTVecTup<56,  7, i8, 213>;  // RISCV vector tuple(min_num_elts=1,  nf=7)
+def riscv_nxv1i8x8   : VTVecTup<64,  8, i8, 214>;  // RISCV vector tuple(min_num_elts=1,  nf=8)
+def riscv_nxv2i8x2   : VTVecTup<32,  2, i8, 215>;  // RISCV vector tuple(min_num_elts=2,  nf=2)
+def riscv_nxv2i8x3   : VTVecTup<48,  3, i8, 216>;  // RISCV vector tuple(min_num_elts=2,  nf=3)
+def riscv_nxv2i8x4   : VTVecTup<64,  4, i8, 217>;  // RISCV vector tuple(min_num_elts=2,  nf=4)
+def riscv_nxv2i8x5   : VTVecTup<80,  5, i8, 218>;  // RISCV vector tuple(min_num_elts=2,  nf=5)
+def riscv_nxv2i8x6   : VTVecTup<96,  6, i8, 219>;  // RISCV vector tuple(min_num_elts=2,  nf=6)
+def riscv_nxv2i8x7   : VTVecTup<112, 7, i8, 220>;  // RISCV vector tuple(min_num_elts=2,  nf=7)
+def riscv_nxv2i8x8   : VTVecTup<128, 8, i8, 221>;  // RISCV vector tuple(min_num_elts=2,  nf=8)
+def riscv_nxv4i8x2   : VTVecTup<64,  2, i8, 222>;  // RISCV vector tuple(min_num_elts=4,  nf=2)
+def riscv_nxv4i8x3   : VTVecTup<96,  3, i8, 223>;  // RISCV vector tuple(min_num_elts=4,  nf=3)
+def riscv_nxv4i8x4   : VTVecTup<128, 4, i8, 224>;  // RISCV vector tuple(min_num_elts=4,  nf=4)
+def riscv_nxv4i8x5   : VTVecTup<160, 5, i8, 225>;  // RISCV vector tuple(min_num_elts=4,  nf=5)
+def riscv_nxv4i8x6   : VTVecTup<192, 6, i8, 226>;  // RISCV vector tuple(min_num_elts=4,  nf=6)
+def riscv_nxv4i8x7   : VTVecTup<224, 7, i8, 227>;  // RISCV vector tuple(min_num_elts=4,  nf=7)
+def riscv_nxv4i8x8   : VTVecTup<256, 8, i8, 228>;  // RISCV vector tuple(min_num_elts=4,  nf=8)
+def riscv_nxv8i8x2   : VTVecTup<128, 2, i8, 229>;  // RISCV vector tuple(min_num_elts=8,  nf=2)
+def riscv_nxv8i8x3   : VTVecTup<192, 3, i8, 230>;  // RISCV vector tuple(min_num_elts=8,  nf=3)
+def riscv_nxv8i8x4   : VTVecTup<256, 4, i8, 231>;  // RISCV vector tuple(min_num_elts=8,  nf=4)
+def riscv_nxv8i8x5   : VTVecTup<320, 5, i8, 232>;  // RISCV vector tuple(min_num_elts=8,  nf=5)
+def riscv_nxv8i8x6   : VTVecTup<384, 6, i8, 233>;  // RISCV vector tuple(min_num_elts=8,  nf=6)
+def riscv_nxv8i8x7   : VTVecTup<448, 7, i8, 234>;  // RISCV vector tuple(min_num_elts=8,  nf=7)
+def riscv_nxv8i8x8   : VTVecTup<512, 8, i8, 235>;  // RISCV vector tuple(min_num_elts=8,  nf=8)
+def riscv_nxv16i8x2  : VTVecTup<256, 2, i8, 236>;  // RISCV vector tuple(min_num_elts=16, nf=2)
+def riscv_nxv16i8x3  : VTVecTup<384, 3, i8, 237>;  // RISCV vector tuple(min_num_elts=16, nf=3)
+def riscv_nxv16i8x4  : VTVecTup<512, 4, i8, 238>;  // RISCV vector tuple(min_num_elts=16, nf=4)
+def riscv_nxv32i8x2  : VTVecTup<512, 2, i8, 239>;  // RISCV vector tuple(min_num_elts=32, nf=2)
+
+def x86mmx    : ValueType<64,   240>;  // X86 MMX value
+def Glue      : ValueType<0,    241>;  // Pre-RA sched glue
+def isVoid    : ValueType<0,    242>;  // Produces no value
+def untyped   : ValueType<8,    243> { // Produces an untyped value
   let LLVMName = "Untyped";
 }
-def funcref   : ValueType<0,    232>;  // WebAssembly's funcref type
-def externref : ValueType<0,    233>;  // WebAssembly's externref type
-def exnref    : ValueType<0,    234>;  // WebAssembly's exnref type
-def x86amx    : ValueType<8192, 235>;  // X86 AMX value
-def i64x8     : ValueType<512,  236>;  // 8 Consecutive GPRs (AArch64)
+def funcref   : ValueType<0,    244>;  // WebAssembly's funcref type
+def externref : ValueType<0,    245>;  // WebAssembly's externref type
+def exnref    : ValueType<0,    246>;  // WebAssembly's exnref type
+def x86amx    : ValueType<8192, 247>;  // X86 AMX value
+def i64x8     : ValueType<512,  248>;  // 8 Consecutive GPRs (AArch64)
 def aarch64svcount
-              : ValueType<16,  237>;  // AArch64 predicate-as-counter
-def spirvbuiltin : ValueType<0, 238>; // SPIR-V's builtin type
+              : ValueType<16,  249>;  // AArch64 predicate-as-counter
+def spirvbuiltin : ValueType<0, 250>; // SPIR-V's builtin type
 // AMDGPU buffer fat pointer, buffer rsrc + offset, rewritten before MIR translation.
 // FIXME: Remove this and the getPointerType() override if MVT::i160 is added.
-def amdgpuBufferFatPointer : ValueType<160, 239>;
+def amdgpuBufferFatPointer : ValueType<160, 251>;
 // AMDGPU buffer strided pointer, buffer rsrc + index + offset, doesn't reach MIR.
 // FIXME: Remove this and the getPointerType() override if MVT::i82 is added.
-def amdgpuBufferStridedPointer : ValueType<192, 240>;
+def amdgpuBufferStridedPointer : ValueType<192, 252>;
 
-def aarch64mfp8 : ValueType<8,  241>;  // 8-bit value in FPR (AArch64)
+def aarch64mfp8 : ValueType<8,  253>;  // 8-bit value in FPR (AArch64)
 
 let isNormalValueType = false in {
 def token      : ValueType<0, 504>;  // TokenTy
diff --git a/llvm/include/llvm/Frontend/HLSL/HLSLBinding.h b/llvm/include/llvm/Frontend/HLSL/HLSLBinding.h
new file mode 100644
index 0000000..70a2eeb
--- /dev/null
+++ b/llvm/include/llvm/Frontend/HLSL/HLSLBinding.h
@@ -0,0 +1,162 @@
+//===- HLSLBinding.h - Representation for resource bindings in HLSL -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file contains objects to represent resource bindings.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FRONTEND_HLSL_HLSLBINDING_H
+#define LLVM_FRONTEND_HLSL_HLSLBINDING_H
+
+#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DXILABI.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+namespace hlsl {
+
+/// BindingInfo represents the ranges of bindings and free space for each
+/// `dxil::ResourceClass`. This can represent HLSL-level bindings as well as
+/// bindings described in root signatures, and can be used for analysis of
+/// overlapping or missing bindings as well as for finding space for implicit
+/// bindings.
+///
+/// As an example, given these resource bindings:
+///
+/// RWBuffer<float> A[10] : register(u3);
+/// RWBuffer<float> B[] : register(u5, space2)
+///
+/// The binding info for UAV bindings should look like this:
+///
+/// UAVSpaces {
+///   ResClass = ResourceClass::UAV,
+///   Spaces = {
+///     { Space = 0u, FreeRanges = {{ 0u, 2u }, { 13u, ~0u }} },
+///     { Space = 2u, FreeRanges = {{ 0u, 4u }} }
+///   }
+/// }
+class BindingInfo {
+public:
+  struct BindingRange {
+    uint32_t LowerBound;
+    uint32_t UpperBound;
+    BindingRange(uint32_t LB, uint32_t UB) : LowerBound(LB), UpperBound(UB) {}
+  };
+
+  struct RegisterSpace {
+    uint32_t Space;
+    SmallVector<BindingRange> FreeRanges;
+    RegisterSpace(uint32_t Space) : Space(Space) {
+      FreeRanges.emplace_back(0, ~0u);
+    }
+    // Size == -1 means unbounded array
+    LLVM_ABI std::optional<uint32_t> findAvailableBinding(int32_t Size);
+  };
+
+  struct BindingSpaces {
+    dxil::ResourceClass RC;
+    llvm::SmallVector<RegisterSpace> Spaces;
+    BindingSpaces(dxil::ResourceClass RC) : RC(RC) {}
+    LLVM_ABI RegisterSpace &getOrInsertSpace(uint32_t Space);
+  };
+
+private:
+  BindingSpaces SRVSpaces{dxil::ResourceClass::SRV};
+  BindingSpaces UAVSpaces{dxil::ResourceClass::UAV};
+  BindingSpaces CBufferSpaces{dxil::ResourceClass::CBuffer};
+  BindingSpaces SamplerSpaces{dxil::ResourceClass::Sampler};
+
+public:
+  BindingSpaces &getBindingSpaces(dxil::ResourceClass RC) {
+    switch (RC) {
+    case dxil::ResourceClass::SRV:
+      return SRVSpaces;
+    case dxil::ResourceClass::UAV:
+      return UAVSpaces;
+    case dxil::ResourceClass::CBuffer:
+      return CBufferSpaces;
+    case dxil::ResourceClass::Sampler:
+      return SamplerSpaces;
+    }
+
+    llvm_unreachable("Invalid resource class");
+  }
+  const BindingSpaces &getBindingSpaces(dxil::ResourceClass RC) const {
+    return const_cast<BindingInfo *>(this)->getBindingSpaces(RC);
+  }
+
+  // Size == -1 means unbounded array
+  LLVM_ABI std::optional<uint32_t>
+  findAvailableBinding(dxil::ResourceClass RC, uint32_t Space, int32_t Size);
+
+  friend class BindingInfoBuilder;
+};
+
+/// Builder class for creating a /c BindingInfo.
+class BindingInfoBuilder {
+public:
+  struct Binding {
+    dxil::ResourceClass RC;
+    uint32_t Space;
+    uint32_t LowerBound;
+    uint32_t UpperBound;
+    const void *Cookie;
+
+    Binding(dxil::ResourceClass RC, uint32_t Space, uint32_t LowerBound,
+            uint32_t UpperBound, const void *Cookie)
+        : RC(RC), Space(Space), LowerBound(LowerBound), UpperBound(UpperBound),
+          Cookie(Cookie) {}
+
+    bool isUnbounded() const { return UpperBound == ~0U; }
+
+    bool operator==(const Binding &RHS) const {
+      return std::tie(RC, Space, LowerBound, UpperBound, Cookie) ==
+             std::tie(RHS.RC, RHS.Space, RHS.LowerBound, RHS.UpperBound,
+                      RHS.Cookie);
+    }
+    bool operator!=(const Binding &RHS) const { return !(*this == RHS); }
+
+    bool operator<(const Binding &RHS) const {
+      return std::tie(RC, Space, LowerBound) <
+             std::tie(RHS.RC, RHS.Space, RHS.LowerBound);
+    }
+  };
+
+private:
+  SmallVector<Binding> Bindings;
+
+public:
+  void trackBinding(dxil::ResourceClass RC, uint32_t Space, uint32_t LowerBound,
+                    uint32_t UpperBound, const void *Cookie) {
+    Bindings.emplace_back(RC, Space, LowerBound, UpperBound, Cookie);
+  }
+  /// Calculate the binding info - \c ReportOverlap will be called once for each
+  /// overlapping binding.
+  BindingInfo calculateBindingInfo(
+      llvm::function_ref<void(const BindingInfoBuilder &Builder,
+                              const Binding &Overlapping)>
+          ReportOverlap);
+
+  /// Calculate the binding info - \c HasOverlap will be set to indicate whether
+  /// there are any overlapping bindings.
+  BindingInfo calculateBindingInfo(bool &HasOverlap) {
+    HasOverlap = false;
+    return calculateBindingInfo(
+        [&HasOverlap](auto, auto) { HasOverlap = true; });
+  }
+
+  /// For use in the \c ReportOverlap callback of \c calculateBindingInfo -
+  /// finds a binding that the \c ReportedBinding overlaps with.
+  const Binding &findOverlapping(const Binding &ReportedBinding) const;
+};
+
+} // namespace hlsl
+} // namespace llvm
+
+#endif // LLVM_FRONTEND_HLSL_HLSLBINDING_H
diff --git a/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h b/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h
index 6fa51ed..0bd0774 100644
--- a/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h
+++ b/llvm/include/llvm/Frontend/HLSL/RootSignatureMetadata.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_FRONTEND_HLSL_ROOTSIGNATUREMETADATA_H
 #define LLVM_FRONTEND_HLSL_ROOTSIGNATUREMETADATA_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Frontend/HLSL/HLSLRootSignature.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/MC/DXContainerRootSignature.h"
@@ -26,6 +27,80 @@ class Metadata;
 namespace hlsl {
 namespace rootsig {
 
+template <typename T>
+class RootSignatureValidationError
+    : public ErrorInfo<RootSignatureValidationError<T>> {
+public:
+  static char ID;
+  StringRef ParamName;
+  T Value;
+
+  RootSignatureValidationError(StringRef ParamName, T Value)
+      : ParamName(ParamName), Value(Value) {}
+
+  void log(raw_ostream &OS) const override {
+    OS << "Invalid value for " << ParamName << ": " << Value;
+  }
+
+  std::error_code convertToErrorCode() const override {
+    return llvm::inconvertibleErrorCode();
+  }
+};
+
+class GenericRSMetadataError : public ErrorInfo<GenericRSMetadataError> {
+public:
+  static char ID;
+  StringRef Message;
+  MDNode *MD;
+
+  GenericRSMetadataError(StringRef Message, MDNode *MD)
+      : Message(Message), MD(MD) {}
+
+  void log(raw_ostream &OS) const override {
+    OS << Message;
+    if (MD) {
+      OS << "\n";
+      MD->printTree(OS);
+    }
+  }
+
+  std::error_code convertToErrorCode() const override {
+    return llvm::inconvertibleErrorCode();
+  }
+};
+
+class InvalidRSMetadataFormat : public ErrorInfo<InvalidRSMetadataFormat> {
+public:
+  static char ID;
+  StringRef ElementName;
+
+  InvalidRSMetadataFormat(StringRef ElementName) : ElementName(ElementName) {}
+
+  void log(raw_ostream &OS) const override {
+    OS << "Invalid format for  " << ElementName;
+  }
+
+  std::error_code convertToErrorCode() const override {
+    return llvm::inconvertibleErrorCode();
+  }
+};
+
+class InvalidRSMetadataValue : public ErrorInfo<InvalidRSMetadataValue> {
+public:
+  static char ID;
+  StringRef ParamName;
+
+  InvalidRSMetadataValue(StringRef ParamName) : ParamName(ParamName) {}
+
+  void log(raw_ostream &OS) const override {
+    OS << "Invalid value for " << ParamName;
+  }
+
+  std::error_code convertToErrorCode() const override {
+    return llvm::inconvertibleErrorCode();
+  }
+};
+
 class MetadataBuilder {
 public:
   MetadataBuilder(llvm::LLVMContext &Ctx, ArrayRef<RootElement> Elements)
@@ -66,29 +141,27 @@ class MetadataParser {
 public:
   MetadataParser(MDNode *Root) : Root(Root) {}
 
-  LLVM_ABI bool ParseRootSignature(LLVMContext *Ctx,
-                                   mcdxbc::RootSignatureDesc &RSD);
+  LLVM_ABI llvm::Expected<llvm::mcdxbc::RootSignatureDesc>
+  ParseRootSignature(uint32_t Version);
 
 private:
-  bool parseRootFlags(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
-                      MDNode *RootFlagNode);
-  bool parseRootConstants(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
-                          MDNode *RootConstantNode);
-  bool parseRootDescriptors(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
-                            MDNode *RootDescriptorNode,
-                            RootSignatureElementKind ElementKind);
-  bool parseDescriptorRange(LLVMContext *Ctx, mcdxbc::DescriptorTable &Table,
-                            MDNode *RangeDescriptorNode);
-  bool parseDescriptorTable(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
-                            MDNode *DescriptorTableNode);
-  bool parseRootSignatureElement(LLVMContext *Ctx,
-                                 mcdxbc::RootSignatureDesc &RSD,
-                                 MDNode *Element);
-  bool parseStaticSampler(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
-                          MDNode *StaticSamplerNode);
-
-  bool validateRootSignature(LLVMContext *Ctx,
-                             const llvm::mcdxbc::RootSignatureDesc &RSD);
+  llvm::Error parseRootFlags(mcdxbc::RootSignatureDesc &RSD,
+                             MDNode *RootFlagNode);
+  llvm::Error parseRootConstants(mcdxbc::RootSignatureDesc &RSD,
+                                 MDNode *RootConstantNode);
+  llvm::Error parseRootDescriptors(mcdxbc::RootSignatureDesc &RSD,
+                                   MDNode *RootDescriptorNode,
+                                   RootSignatureElementKind ElementKind);
+  llvm::Error parseDescriptorRange(mcdxbc::DescriptorTable &Table,
+                                   MDNode *RangeDescriptorNode);
+  llvm::Error parseDescriptorTable(mcdxbc::RootSignatureDesc &RSD,
+                                   MDNode *DescriptorTableNode);
+  llvm::Error parseRootSignatureElement(mcdxbc::RootSignatureDesc &RSD,
+                                        MDNode *Element);
+  llvm::Error parseStaticSampler(mcdxbc::RootSignatureDesc &RSD,
+                                 MDNode *StaticSamplerNode);
+
+  llvm::Error validateRootSignature(const llvm::mcdxbc::RootSignatureDesc &RSD);
 
   MDNode *Root;
 };
diff --git a/llvm/include/llvm/Frontend/Offloading/PropertySet.h b/llvm/include/llvm/Frontend/Offloading/PropertySet.h
new file mode 100644
index 0000000..d198d3e
--- /dev/null
+++ b/llvm/include/llvm/Frontend/Offloading/PropertySet.h
@@ -0,0 +1,33 @@
+///===- llvm/Frontend/Offloading/PropertySet.h ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+///===---------------------------------------------------------------------===//
+/// \file This file defines PropertySetRegistry and PropertyValue types and
+/// provides helper functions to translate PropertySetRegistry from/to JSON.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Error.h"
+
+#include <map>
+#include <variant>
+
+namespace llvm {
+class raw_ostream;
+class MemoryBufferRef;
+
+namespace offloading {
+
+using ByteArray = SmallVector<unsigned char, 0>;
+using PropertyValue = std::variant<uint32_t, ByteArray>;
+using PropertySet = std::map<std::string, PropertyValue>;
+using PropertySetRegistry = std::map<std::string, PropertySet>;
+
+void writePropertiesToJSON(const PropertySetRegistry &P, raw_ostream &O);
+Expected<PropertySetRegistry> readPropertiesFromJSON(MemoryBufferRef Buf);
+
+} // namespace offloading
+} // namespace llvm
diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index 7919f7a..ce1cedc 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -578,8 +578,9 @@ struct DynamicAllocatorsT {
 template <typename T, typename I, typename E> //
 struct EnterT {
   using List = ObjectListT<I, E>;
-  using WrapperTrait = std::true_type;
-  List v;
+  ENUM(Modifier, Automap);
+  using TupleTrait = std::true_type;
+  std::tuple<OPT(Modifier), List> t;
 };
 
 // V5.2: [5.6.2] `exclusive` clause
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 1b94657..79f25bb 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -179,7 +179,7 @@ def OMPC_DynamicAllocators : Clause<[Spelling<"dynamic_allocators">]> {
   let clangClass = "OMPDynamicAllocatorsClause";
 }
 def OMPC_Enter : Clause<[Spelling<"enter">]> {
-  let flangClass = "OmpObjectList";
+  let flangClass = "OmpEnterClause";
 }
 def OMPC_Exclusive : Clause<[Spelling<"exclusive">]> {
   let clangClass = "OMPExclusiveClause";
@@ -1460,7 +1460,7 @@ def OMP_DoSimd : Directive<[Spelling<"do simd">]> {
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Collapse>,
-    VersionedClause<OMPC_If>,
+    VersionedClause<OMPC_If, 50>,
     VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_Order, 50>,
     VersionedClause<OMPC_Ordered>,
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 0318427..2e13896 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -996,14 +996,6 @@ public:
     return cast<PointerType>(getRawDest()->getType())->getAddressSpace();
   }
 
-  /// FIXME: Remove this function once transition to Align is over.
-  /// Use getDestAlign() instead.
-  LLVM_DEPRECATED("Use getDestAlign() instead", "getDestAlign")
-  unsigned getDestAlignment() const {
-    if (auto MA = getParamAlign(ARG_DEST))
-      return MA->value();
-    return 0;
-  }
   MaybeAlign getDestAlign() const { return getParamAlign(ARG_DEST); }
 
   /// Set the specified arguments of the instruction.
@@ -1057,15 +1049,6 @@ public:
     return cast<PointerType>(getRawSource()->getType())->getAddressSpace();
   }
 
-  /// FIXME: Remove this function once transition to Align is over.
-  /// Use getSourceAlign() instead.
-  LLVM_DEPRECATED("Use getSourceAlign() instead", "getSourceAlign")
-  unsigned getSourceAlignment() const {
-    if (auto MA = BaseCL::getParamAlign(ARG_SOURCE))
-      return MA->value();
-    return 0;
-  }
-
   MaybeAlign getSourceAlign() const {
     return BaseCL::getParamAlign(ARG_SOURCE);
   }
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 1da4e36..469bdb4 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -593,6 +593,14 @@ def int_amdgcn_tanh : DefaultAttrsIntrinsic<
   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
 >;
 
+def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic<
+  [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">;
+
+def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic<
+  [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">;
+
 def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic<
   [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
 >, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;
@@ -601,32 +609,108 @@ def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic<
   [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
 >, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">;
 
-class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
-  [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pk_fp8_f16
+    : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
+                            [IntrNoMem, IntrSpeculatable]>,
+      ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">;
+
+def int_amdgcn_cvt_pk_bf8_f16
+    : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
+                            [IntrNoMem, IntrSpeculatable]>,
+      ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">;
+
+// llvm.amdgcn.cvt.sr.fp8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
+// byte_sel selects byte to write in vdst.
+def int_amdgcn_cvt_sr_fp8_f16 : DefaultAttrsIntrinsic<
+  [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f16">;
+
+// llvm.amdgcn.cvt.sr.bf8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
+// byte_sel selects byte to write in vdst.
+def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic<
+  [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">;
+
+// llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..7]
+class AMDGPUCvtScaleIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
+  [DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
-class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMType Src1Ty, string name> : DefaultAttrsIntrinsic<
-  [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
+  [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 class AMDGPUCvtScaleF32SRIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
   [DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
+def int_amdgcn_cvt_scale_pk8_f16_fp8   : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty,   llvm_v2i32_ty, "cvt_scale_pk8_f16_fp8">;
+def int_amdgcn_cvt_scale_pk8_bf16_fp8  : AMDGPUCvtScaleIntrinsic<llvm_v8bf16_ty,  llvm_v2i32_ty, "cvt_scale_pk8_bf16_fp8">;
+def int_amdgcn_cvt_scale_pk8_f16_bf8   : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty,   llvm_v2i32_ty, "cvt_scale_pk8_f16_bf8">;
+def int_amdgcn_cvt_scale_pk8_bf16_bf8  : AMDGPUCvtScaleIntrinsic<llvm_v8bf16_ty,  llvm_v2i32_ty, "cvt_scale_pk8_bf16_bf8">;
+def int_amdgcn_cvt_scale_pk8_f16_fp4   : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty,   llvm_i32_ty,   "cvt_scale_pk8_f16_fp4">;
+def int_amdgcn_cvt_scale_pk8_bf16_fp4  : AMDGPUCvtScaleIntrinsic<llvm_v8bf16_ty,  llvm_i32_ty,   "cvt_scale_pk8_bf16_fp4">;
+def int_amdgcn_cvt_scale_pk8_f32_fp8   : AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty,   llvm_v2i32_ty, "cvt_scale_pk8_f32_fp8">;
+def int_amdgcn_cvt_scale_pk8_f32_bf8   : AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty,   llvm_v2i32_ty, "cvt_scale_pk8_f32_bf8">;
+def int_amdgcn_cvt_scale_pk8_f32_fp4   : AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty,   llvm_i32_ty,   "cvt_scale_pk8_f32_fp4">;
+def int_amdgcn_cvt_scale_pk16_f16_bf6  : AMDGPUCvtScaleIntrinsic<llvm_v16f16_ty,  llvm_v3i32_ty, "cvt_scale_pk16_f16_bf6">;
+def int_amdgcn_cvt_scale_pk16_bf16_bf6 : AMDGPUCvtScaleIntrinsic<llvm_v16bf16_ty, llvm_v3i32_ty, "cvt_scale_pk16_bf16_bf6">;
+def int_amdgcn_cvt_scale_pk16_f16_fp6  : AMDGPUCvtScaleIntrinsic<llvm_v16f16_ty,  llvm_v3i32_ty, "cvt_scale_pk16_f16_fp6">;
+def int_amdgcn_cvt_scale_pk16_bf16_fp6 : AMDGPUCvtScaleIntrinsic<llvm_v16bf16_ty, llvm_v3i32_ty, "cvt_scale_pk16_bf16_fp6">;
+def int_amdgcn_cvt_scale_pk16_f32_fp6  : AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty,  llvm_v3i32_ty, "cvt_scale_pk16_f32_fp6">;
+def int_amdgcn_cvt_scale_pk16_f32_bf6  : AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty,  llvm_v3i32_ty, "cvt_scale_pk16_f32_bf6">;
+
+class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMType Src1Ty, string name> : DefaultAttrsIntrinsic<
+  [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+>, ClangBuiltin<"__builtin_amdgcn_"#name>;
+
 def int_amdgcn_cvt_scalef32_pk32_fp6_f16  : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty,  "cvt_scalef32_pk32_fp6_f16">;
 def int_amdgcn_cvt_scalef32_pk32_bf6_f16  : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty,  "cvt_scalef32_pk32_bf6_f16">;
+def int_amdgcn_cvt_scalef32_pk8_fp8_bf16  : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8bf16_ty,  "cvt_scalef32_pk8_fp8_bf16">;
+def int_amdgcn_cvt_scalef32_pk8_bf8_bf16  : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8bf16_ty,  "cvt_scalef32_pk8_bf8_bf16">;
 def int_amdgcn_cvt_scalef32_pk32_fp6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_pk32_fp6_bf16">;
 def int_amdgcn_cvt_scalef32_pk32_bf6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_pk32_bf6_bf16">;
+def int_amdgcn_cvt_scalef32_pk8_fp8_f16   : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f16_ty,   "cvt_scalef32_pk8_fp8_f16">;
+def int_amdgcn_cvt_scalef32_pk8_bf8_f16   : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f16_ty,   "cvt_scalef32_pk8_bf8_f16">;
+def int_amdgcn_cvt_scalef32_pk8_fp8_f32   : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f32_ty,   "cvt_scalef32_pk8_fp8_f32">;
+def int_amdgcn_cvt_scalef32_pk8_bf8_f32   : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f32_ty,   "cvt_scalef32_pk8_bf8_f32">;
+def int_amdgcn_cvt_scalef32_pk8_fp4_f32   : AMDGPUCvtScaleF32Intrinsic<llvm_i32_ty,   llvm_v8f32_ty,   "cvt_scalef32_pk8_fp4_f32">;
+def int_amdgcn_cvt_scalef32_pk8_fp4_f16   : AMDGPUCvtScaleF32Intrinsic<llvm_i32_ty,   llvm_v8f16_ty,   "cvt_scalef32_pk8_fp4_f16">;
+def int_amdgcn_cvt_scalef32_pk8_fp4_bf16  : AMDGPUCvtScaleF32Intrinsic<llvm_i32_ty,   llvm_v8bf16_ty,  "cvt_scalef32_pk8_fp4_bf16">;
+def int_amdgcn_cvt_scalef32_pk16_fp6_f32  : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16f32_ty,   "cvt_scalef32_pk16_fp6_f32">;
+def int_amdgcn_cvt_scalef32_pk16_bf6_f32  : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16f32_ty,   "cvt_scalef32_pk16_bf6_f32">;
+def int_amdgcn_cvt_scalef32_pk16_fp6_f16  : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16f16_ty,   "cvt_scalef32_pk16_fp6_f16">;
+def int_amdgcn_cvt_scalef32_pk16_bf6_f16  : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16f16_ty,   "cvt_scalef32_pk16_bf6_f16">;
+def int_amdgcn_cvt_scalef32_pk16_fp6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16bf16_ty,  "cvt_scalef32_pk16_fp6_bf16">;
+def int_amdgcn_cvt_scalef32_pk16_bf6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v3i32_ty, llvm_v16bf16_ty,  "cvt_scalef32_pk16_bf6_bf16">;
+
+def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f32  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty,  "cvt_scalef32_sr_pk32_fp6_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f32  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty,  "cvt_scalef32_sr_pk32_bf6_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f16  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty,  "cvt_scalef32_sr_pk32_fp6_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f16  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty,  "cvt_scalef32_sr_pk32_bf6_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_fp6_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_bf6_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk8_fp8_bf16  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8bf16_ty,  "cvt_scalef32_sr_pk8_fp8_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk8_bf8_bf16  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8bf16_ty,  "cvt_scalef32_sr_pk8_bf8_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk8_fp8_f16   : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8f16_ty,   "cvt_scalef32_sr_pk8_fp8_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk8_bf8_f16   : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8f16_ty,   "cvt_scalef32_sr_pk8_bf8_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk8_fp8_f32   : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8f32_ty,   "cvt_scalef32_sr_pk8_fp8_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk8_bf8_f32   : AMDGPUCvtScaleF32SRIntrinsic<llvm_v2i32_ty, llvm_v8f32_ty,   "cvt_scalef32_sr_pk8_bf8_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk8_fp4_f32   : AMDGPUCvtScaleF32SRIntrinsic<llvm_i32_ty,   llvm_v8f32_ty,   "cvt_scalef32_sr_pk8_fp4_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk8_fp4_f16   : AMDGPUCvtScaleF32SRIntrinsic<llvm_i32_ty,   llvm_v8f16_ty,   "cvt_scalef32_sr_pk8_fp4_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk8_fp4_bf16  : AMDGPUCvtScaleF32SRIntrinsic<llvm_i32_ty,   llvm_v8bf16_ty,  "cvt_scalef32_sr_pk8_fp4_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk16_fp6_f32  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16f32_ty,  "cvt_scalef32_sr_pk16_fp6_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk16_bf6_f32  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16f32_ty,  "cvt_scalef32_sr_pk16_bf6_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk16_fp6_f16  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16f16_ty,  "cvt_scalef32_sr_pk16_fp6_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk16_bf6_f16  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16f16_ty,  "cvt_scalef32_sr_pk16_bf6_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk16_fp6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16bf16_ty, "cvt_scalef32_sr_pk16_fp6_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk16_bf6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v3i32_ty, llvm_v16bf16_ty, "cvt_scalef32_sr_pk16_bf6_bf16">;
+
 def int_amdgcn_cvt_scalef32_2xpk16_fp6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_fp6_f32">;
 def int_amdgcn_cvt_scalef32_2xpk16_bf6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_bf6_f32">;
 
-def int_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_bf6_bf16">;
-def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f16  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_bf6_f16">;
-def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f32  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, "cvt_scalef32_sr_pk32_bf6_f32">;
-def int_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_fp6_bf16">;
-def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f16  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_fp6_f16">;
-def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f32  : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, "cvt_scalef32_sr_pk32_fp6_f32">;
-
 class AMDGPUCvtScaleFP4FP8BF8ToF1632Intrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
   [DstTy],
   [llvm_i32_ty,   // src
@@ -3473,6 +3557,12 @@ def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">,
             [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty],
             [IntrNoMem, ImmArg<ArgIndex<3>>]>;
 
+// llvm.amdgcn.cvt.pk.fp8.f32.e5m3 int vdst, float srcA, float srcB, int old, imm word_sel
+def int_amdgcn_cvt_pk_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32_e5m3">,
+  DefaultAttrsIntrinsic<[llvm_i32_ty],
+            [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty],
+            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+
 // llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3]
 // byte_sel selects byte to write into vdst.
 def int_amdgcn_cvt_sr_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f32">,
@@ -3486,6 +3576,12 @@ def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">,
             [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
             [IntrNoMem, ImmArg<ArgIndex<3>>]>;
 
+// llvm.amdgcn.cvt.sr.fp8.f32.e5m3 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3]
+def int_amdgcn_cvt_sr_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32_e5m3">,
+  DefaultAttrsIntrinsic<[llvm_i32_ty],
+            [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+
 // llvm.amdgcn.cvt.off.fp32.i4 int srcA
 def int_amdgcn_cvt_off_f32_i4: ClangBuiltin<"__builtin_amdgcn_cvt_off_f32_i4">,
   DefaultAttrsIntrinsic<[llvm_float_ty],
@@ -3597,6 +3693,48 @@ def int_amdgcn_sat_pk4_i4_i8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_i4_i8">,
 def int_amdgcn_sat_pk4_u4_u8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_u4_u8">,
   DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
 
+// llvm.amdgcn.permlane.bcast <src0> <src1> <src2>
+def int_amdgcn_permlane_bcast : ClangBuiltin<"__builtin_amdgcn_permlane_bcast">,
+  Intrinsic<[llvm_i32_ty],
+            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+
+// llvm.amdgcn.permlane.up <src0> <src1> <src2>
+def int_amdgcn_permlane_up : ClangBuiltin<"__builtin_amdgcn_permlane_up">,
+  Intrinsic<[llvm_i32_ty],
+            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+
+// llvm.amdgcn.permlane.down <src0> <src1> <src2>
+def int_amdgcn_permlane_down : ClangBuiltin<"__builtin_amdgcn_permlane_down">,
+  Intrinsic<[llvm_i32_ty],
+            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+
+// llvm.amdgcn.permlane.xor <src0> <src1> <src2>
+def int_amdgcn_permlane_xor : ClangBuiltin<"__builtin_amdgcn_permlane_xor">,
+  Intrinsic<[llvm_i32_ty],
+            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+
+// llvm.amdgcn.permlane.idx.gen <src0> <src1>
+def int_amdgcn_permlane_idx_gen : ClangBuiltin<"__builtin_amdgcn_permlane_idx_gen">,
+  Intrinsic<[llvm_i32_ty],
+            [llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+
+def int_amdgcn_perm_pk16_b4_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b4_u4">,
+  DefaultAttrsIntrinsic<[llvm_v2i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty],
+                        [IntrNoMem, IntrSpeculatable]>;
+
+def int_amdgcn_perm_pk16_b6_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b6_u4">,
+  DefaultAttrsIntrinsic<[llvm_v3i32_ty], [llvm_i32_ty, llvm_i64_ty, llvm_v2i32_ty],
+                        [IntrNoMem, IntrSpeculatable]>;
+
+def int_amdgcn_perm_pk16_b8_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b8_u4">,
+  DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_v2i32_ty],
+                        [IntrNoMem, IntrSpeculatable]>;
+
 //===----------------------------------------------------------------------===//
 // Special Intrinsics for backend internal use only. No frontend
 // should emit calls to these.
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index e63a41f..99f975f 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1717,6 +1717,16 @@ let TargetPrefix = "riscv" in {
                                    llvm_anyint_ty],
                                   [NoCapture<ArgIndex<0>>, IntrReadMem]>;
 
+    // Input: (pointer, offset, mask, vl)
+    def int_riscv_sseg # nf # _load_mask
+          : DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty],
+                                              !listsplat(LLVMMatchType<0>,
+                                              !add(nf, -1))),
+                                  [llvm_anyptr_ty, llvm_anyint_ty,
+                                   LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                   llvm_anyint_ty],
+                                  [NoCapture<ArgIndex<0>>, IntrReadMem]>;
+
     // Input: (<stored values>..., pointer, mask, vl)
     def int_riscv_seg # nf # _store_mask
           : DefaultAttrsIntrinsic<[],
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index d8e632b..323c478 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -542,21 +542,23 @@ private:
                             ArrayRef<SymbolResolution> Res, unsigned Partition,
                             bool InSummary);
 
-  // These functions take a range of symbol resolutions [ResI, ResE) and consume
-  // the resolutions used by a single input module by incrementing ResI. After
-  // these functions return, [ResI, ResE) will refer to the resolution range for
-  // the remaining modules in the InputFile.
-  Error addModule(InputFile &Input, unsigned ModI,
-                  const SymbolResolution *&ResI, const SymbolResolution *ResE);
-
-  Expected<RegularLTOState::AddedModule>
-  addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
-                const SymbolResolution *&ResI, const SymbolResolution *ResE);
+  // These functions take a range of symbol resolutions and consume the
+  // resolutions used by a single input module. Functions return ranges refering
+  // to the resolutions for the remaining modules in the InputFile.
+  Expected<ArrayRef<SymbolResolution>>
+  addModule(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
+            unsigned ModI, ArrayRef<SymbolResolution> Res);
+
+  Expected<std::pair<RegularLTOState::AddedModule, ArrayRef<SymbolResolution>>>
+  addRegularLTO(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
+                BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
+                ArrayRef<SymbolResolution> Res);
   Error linkRegularLTO(RegularLTOState::AddedModule Mod,
                        bool LivenessFromIndex);
 
-  Error addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
-                   const SymbolResolution *&ResI, const SymbolResolution *ResE);
+  Expected<ArrayRef<SymbolResolution>>
+  addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
+             ArrayRef<SymbolResolution> Res);
 
   Error runRegularLTO(AddStreamFn AddStream);
   Error runThinLTO(AddStreamFn AddStream, FileCache Cache,
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index bfc1175..311d9ca 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -117,14 +117,13 @@ public:
   void maybeAddReloc(const MCFragment &, const MCFixup &, const MCValue &,
                      uint64_t &Value, bool IsResolved);
 
-  /// Determine if a relocation is required. In addition,
-  /// Apply the \p Value for given \p Fixup into the provided data fragment, at
-  /// the offset specified by the fixup and following the fixup kind as
-  /// appropriate. Errors (such as an out of range fixup value) should be
-  /// reported via \p Ctx.
+  // Determine if a relocation is required. In addition, apply `Value` to the
+  // `Data` fragment at the specified fixup offset if applicable. `Data` points
+  // to the first byte of the fixup offset, which may be at the content's end if
+  // the fixup is zero-sized.
   virtual void applyFixup(const MCFragment &, const MCFixup &,
-                          const MCValue &Target, MutableArrayRef<char> Data,
-                          uint64_t Value, bool IsResolved) = 0;
+                          const MCValue &Target, uint8_t *Data, uint64_t Value,
+                          bool IsResolved) = 0;
 
   /// @}
 
diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h
index 4853701..ffde5ca2 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -99,8 +99,7 @@ private:
   /// \param RecordReloc Record relocation if needed.
   /// relocation.
   bool evaluateFixup(const MCFragment &F, MCFixup &Fixup, MCValue &Target,
-                     uint64_t &Value, bool RecordReloc,
-                     MutableArrayRef<char> Contents) const;
+                     uint64_t &Value, bool RecordReloc, uint8_t *Data) const;
 
   /// Check whether a fixup can be satisfied, or whether it needs to be relaxed
   /// (increased in size, in order to hold its value correctly).
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index 5ce58ae..d69560c 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -69,6 +69,9 @@ protected:
   /// Language Specific Data Area information is emitted to.
   MCSection *LSDASection = nullptr;
 
+  /// Section containing call graph metadata.
+  MCSection *CallGraphSection = nullptr;
+
   /// If exception handling is supported by the target and the target can
   /// support a compact representation of the CIE and FDE, this is the section
   /// to emit them into.
@@ -359,6 +362,8 @@ public:
   MCSection *getFaultMapSection() const { return FaultMapSection; }
   MCSection *getRemarksSection() const { return RemarksSection; }
 
+  MCSection *getCallGraphSection(const MCSection &TextSec) const;
+
   MCSection *getStackSizesSection(const MCSection &TextSec) const;
 
   MCSection *getBBAddrMapSection(const MCSection &TextSec) const;
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index 5ac7aba..eb875a7 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -52,7 +52,7 @@ class MCObjectStreamer : public MCStreamer {
   DenseMap<const MCSymbol *, SmallVector<PendingAssignment, 1>>
       pendingAssignments;
 
-  SmallVector<std::unique_ptr<char[]>, 0> FragStorage;
+  SmallVector<std::unique_ptr<uint8_t[]>, 0> FragStorage;
   // Available bytes in the current block for trailing data or new fragments.
   size_t FragSpace = 0;
 
@@ -88,8 +88,8 @@ public:
   // Add a fragment with a variable-size tail and start a new empty fragment.
   void insert(MCFragment *F);
 
-  char *getCurFragEnd() const {
-    return reinterpret_cast<char *>(CurFrag + 1) + CurFrag->getFixedSize();
+  uint8_t *getCurFragEnd() const {
+    return reinterpret_cast<uint8_t *>(CurFrag + 1) + CurFrag->getFixedSize();
   }
   MCFragment *allocFragSpace(size_t Headroom);
   // Add a new fragment to the current section without a variable-size tail.
@@ -97,7 +97,7 @@ public:
 
   void ensureHeadroom(size_t Headroom);
   void appendContents(ArrayRef<char> Contents);
-  void appendContents(size_t Num, char Elt);
+  void appendContents(size_t Num, uint8_t Elt);
   // Add a fixup to the current fragment. Call ensureHeadroom beforehand to
   // ensure the fixup and appended content apply to the same fragment.
   void addFixup(const MCExpr *Value, MCFixupKind Kind);
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 2e929d8..4022ea7 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -80,19 +80,22 @@ private:
 
   FragmentType Kind;
 
-protected:
+  //== Used by certain fragment types for better packing.
+
+  // The number of fixups for the optional variable-size tail must be small.
+  uint8_t VarFixupSize = 0;
+
   bool LinkerRelaxable : 1;
 
-  /// Used by certain fragment types for better packing.
-  ///
   /// FT_Data, FT_Relaxable
   bool HasInstructions : 1;
   /// FT_Relaxable, x86-specific
   bool AllowAutoPadding : 1;
 
   // Track content and fixups for the fixed-size part as fragments are
-  // appended to the section. The content remains immutable, except when
-  // modified by applyFixup.
+  // appended to the section. The content is stored as trailing data of the
+  // MCFragment. The content remains immutable, except when modified by
+  // applyFixup.
   uint32_t FixedSize = 0;
   uint32_t FixupStart = 0;
   uint32_t FixupEnd = 0;
@@ -102,7 +105,6 @@ protected:
   uint32_t VarContentStart = 0;
   uint32_t VarContentEnd = 0;
   uint32_t VarFixupStart = 0;
-  uint32_t VarFixupEnd = 0;
 
   const MCSubtargetInfo *STI = nullptr;
 
@@ -296,13 +298,8 @@ public:
   }
 };
 
-/// Interface implemented by fragments that contain encoded instructions and/or
-/// data.
-class MCEncodedFragment : public MCFragment {
-protected:
-  MCEncodedFragment(MCFragment::FragmentType FType, bool HasInstructions)
-      : MCFragment(FType, HasInstructions) {}
-};
+// MCFragment subclasses do not use the fixed-size part or variable-size tail of
+// MCFragment. Instead, they encode content in a specialized way.
 
 class MCFillFragment : public MCFragment {
   uint8_t ValueSize;
@@ -318,7 +315,7 @@ class MCFillFragment : public MCFragment {
 public:
   MCFillFragment(uint64_t Value, uint8_t VSize, const MCExpr &NumValues,
                  SMLoc Loc)
-      : MCFragment(FT_Fill, false), ValueSize(VSize), Value(Value),
+      : MCFragment(FT_Fill), ValueSize(VSize), Value(Value),
         NumValues(NumValues), Loc(Loc) {}
 
   uint64_t getValue() const { return Value; }
@@ -349,7 +346,7 @@ class MCNopsFragment : public MCFragment {
 public:
   MCNopsFragment(int64_t NumBytes, int64_t ControlledNopLength, SMLoc L,
                  const MCSubtargetInfo &STI)
-      : MCFragment(FT_Nops, false), Size(NumBytes),
+      : MCFragment(FT_Nops), Size(NumBytes),
         ControlledNopLength(ControlledNopLength), Loc(L), STI(STI) {}
 
   int64_t getNumBytes() const { return Size; }
@@ -376,7 +373,7 @@ class MCOrgFragment : public MCFragment {
 
 public:
   MCOrgFragment(const MCExpr &Offset, int8_t Value, SMLoc Loc)
-      : MCFragment(FT_Org, false), Value(Value), Offset(&Offset), Loc(Loc) {}
+      : MCFragment(FT_Org), Value(Value), Offset(&Offset), Loc(Loc) {}
 
   const MCExpr &getOffset() const { return *Offset; }
 
@@ -394,8 +391,7 @@ class MCSymbolIdFragment : public MCFragment {
   const MCSymbol *Sym;
 
 public:
-  MCSymbolIdFragment(const MCSymbol *Sym)
-      : MCFragment(FT_SymbolId, false), Sym(Sym) {}
+  MCSymbolIdFragment(const MCSymbol *Sym) : MCFragment(FT_SymbolId), Sym(Sym) {}
 
   const MCSymbol *getSymbol() { return Sym; }
   const MCSymbol *getSymbol() const { return Sym; }
@@ -407,7 +403,7 @@ public:
 
 /// Fragment representing the binary annotations produced by the
 /// .cv_inline_linetable directive.
-class MCCVInlineLineTableFragment : public MCEncodedFragment {
+class MCCVInlineLineTableFragment : public MCFragment {
   unsigned SiteFuncId;
   unsigned StartFileId;
   unsigned StartLineNum;
@@ -422,7 +418,7 @@ public:
   MCCVInlineLineTableFragment(unsigned SiteFuncId, unsigned StartFileId,
                               unsigned StartLineNum, const MCSymbol *FnStartSym,
                               const MCSymbol *FnEndSym)
-      : MCEncodedFragment(FT_CVInlineLines, false), SiteFuncId(SiteFuncId),
+      : MCFragment(FT_CVInlineLines), SiteFuncId(SiteFuncId),
         StartFileId(StartFileId), StartLineNum(StartLineNum),
         FnStartSym(FnStartSym), FnEndSym(FnEndSym) {}
 
@@ -435,7 +431,7 @@ public:
 };
 
 /// Fragment representing the .cv_def_range directive.
-class MCCVDefRangeFragment : public MCEncodedFragment {
+class MCCVDefRangeFragment : public MCFragment {
   ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
   StringRef FixedSizePortion;
 
@@ -447,8 +443,7 @@ public:
   MCCVDefRangeFragment(
       ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
       StringRef FixedSizePortion)
-      : MCEncodedFragment(FT_CVDefRange, false),
-        Ranges(Ranges.begin(), Ranges.end()),
+      : MCFragment(FT_CVDefRange), Ranges(Ranges.begin(), Ranges.end()),
         FixedSizePortion(FixedSizePortion) {}
 
   ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> getRanges() const {
@@ -479,8 +474,7 @@ class MCBoundaryAlignFragment : public MCFragment {
 
 public:
   MCBoundaryAlignFragment(Align AlignBoundary, const MCSubtargetInfo &STI)
-      : MCFragment(FT_BoundaryAlign, false), AlignBoundary(AlignBoundary),
-        STI(STI) {}
+      : MCFragment(FT_BoundaryAlign), AlignBoundary(AlignBoundary), STI(STI) {}
 
   uint64_t getSize() const { return Size; }
   void setSize(uint64_t Value) { Size = Value; }
@@ -650,11 +644,10 @@ inline ArrayRef<MCFixup> MCFragment::getFixups() const {
 
 inline MutableArrayRef<MCFixup> MCFragment::getVarFixups() {
   return MutableArrayRef(getParent()->FixupStorage)
-      .slice(VarFixupStart, VarFixupEnd - VarFixupStart);
+      .slice(VarFixupStart, VarFixupSize);
 }
 inline ArrayRef<MCFixup> MCFragment::getVarFixups() const {
-  return ArrayRef(getParent()->FixupStorage)
-      .slice(VarFixupStart, VarFixupEnd - VarFixupStart);
+  return ArrayRef(getParent()->FixupStorage).slice(VarFixupStart, VarFixupSize);
 }
 
 //== FT_Relaxable functions
diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h
index e883f2f..3bf8c29 100644
--- a/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -117,7 +117,7 @@ struct FileHeader {
   llvm::yaml::Hex8 ABIVersion;
   ELF_ET Type;
   std::optional<ELF_EM> Machine;
-  ELF_EF Flags;
+  std::optional<ELF_EF> Flags;
   llvm::yaml::Hex64 Entry;
   std::optional<StringRef> SectionHeaderStringTable;
 
diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc
index 3f785bd..26baddd 100644
--- a/llvm/include/llvm/ProfileData/MemProfData.inc
+++ b/llvm/include/llvm/ProfileData/MemProfData.inc
@@ -33,11 +33,10 @@
    (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
 
 // The version number of the raw binary format.
-#define MEMPROF_RAW_VERSION 4ULL
+#define MEMPROF_RAW_VERSION 5ULL
 
 // Currently supported versions.
-#define MEMPROF_RAW_SUPPORTED_VERSIONS                                         \
-  { 3ULL, 4ULL }
+#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL}
 
 #define MEMPROF_V3_MIB_SIZE 132ULL;
 
@@ -229,6 +228,41 @@ void Merge(const MemInfoBlock &newMIB) {
 } __attribute__((__packed__));
 #endif
 
+constexpr int MantissaBits = 12;
+constexpr int ExponentBits = 4;
+constexpr uint16_t MaxMantissa = (1U << MantissaBits) - 1;
+constexpr uint16_t MaxExponent = (1U << ExponentBits) - 1;
+constexpr uint64_t MaxRepresentableValue = static_cast<uint64_t>(MaxMantissa)
+                                           << MaxExponent;
+
+// Encodes a 64-bit unsigned integer into a 16-bit scaled integer format.
+inline uint16_t encodeHistogramCount(uint64_t Count) {
+  if (Count == 0)
+    return 0;
+
+  if (Count > MaxRepresentableValue)
+    Count = MaxRepresentableValue;
+
+  if (Count <= MaxMantissa)
+    return Count;
+
+  uint64_t M = Count;
+  uint16_t E = 0;
+  while (M > MaxMantissa) {
+    M = (M + 1) >> 1;
+    E++;
+  }
+  return (E << MantissaBits) | static_cast<uint16_t>(M);
+}
+
+// Decodes a 16-bit scaled integer and returns the
+// decoded 64-bit unsigned integer.
+inline uint64_t decodeHistogramCount(uint16_t EncodedValue) {
+  const uint16_t E = EncodedValue >> MantissaBits;
+  const uint16_t M = EncodedValue & MaxMantissa;
+  return static_cast<uint64_t>(M) << E;
+}
+
 } // namespace memprof
 } // namespace llvm
 
diff --git a/llvm/include/llvm/Support/DebugLog.h b/llvm/include/llvm/Support/DebugLog.h
index 8fca2d5..a331295 100644
--- a/llvm/include/llvm/Support/DebugLog.h
+++ b/llvm/include/llvm/Support/DebugLog.h
@@ -61,8 +61,10 @@ namespace llvm {
   for (bool _c =                                                               \
            (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE, LEVEL));     \
        _c; _c = false)                                                         \
+    for (::llvm::impl::RAIINewLineStream NewLineStream{(STREAM)}; _c;          \
+         _c = false)                                                           \
   ::llvm::impl::raw_ldbg_ostream{                                              \
-      ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), (STREAM)}          \
+      ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), NewLineStream}     \
       .asLvalue()
 
 #define DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, LEVEL, TYPE, FILE)          \
@@ -81,14 +83,15 @@ namespace llvm {
 
 namespace impl {
 
-/// A raw_ostream that tracks `\n` and print the prefix.
+/// A raw_ostream that tracks `\n` and print the prefix after each
+/// newline.
 class LLVM_ABI raw_ldbg_ostream final : public raw_ostream {
   std::string Prefix;
   raw_ostream &Os;
-  bool HasPendingNewline = true;
+  bool HasPendingNewline;
 
-  /// Split the line on newlines and insert the prefix before each newline.
-  /// Forward everything to the underlying stream.
+  /// Split the line on newlines and insert the prefix before each
+  /// newline. Forward everything to the underlying stream.
   void write_impl(const char *Ptr, size_t Size) final {
     auto Str = StringRef(Ptr, Size);
     // Handle the initial prefix.
@@ -109,22 +112,18 @@ class LLVM_ABI raw_ldbg_ostream final : public raw_ostream {
   }
   void emitPrefix() { Os.write(Prefix.c_str(), Prefix.size()); }
   void writeWithPrefix(StringRef Str) {
-    if (HasPendingNewline) {
-      emitPrefix();
-      HasPendingNewline = false;
-    }
+    flushEol();
     Os.write(Str.data(), Str.size());
   }
 
 public:
-  explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os)
-      : Prefix(std::move(Prefix)), Os(Os) {
+  explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os,
+                            bool HasPendingNewline = true)
+      : Prefix(std::move(Prefix)), Os(Os),
+        HasPendingNewline(HasPendingNewline) {
     SetUnbuffered();
   }
-  ~raw_ldbg_ostream() final {
-    flushEol();
-    Os << '\n';
-  }
+  ~raw_ldbg_ostream() final { flushEol(); }
   void flushEol() {
     if (HasPendingNewline) {
       emitPrefix();
@@ -135,10 +134,22 @@ public:
   /// Forward the current_pos method to the underlying stream.
   uint64_t current_pos() const final { return Os.tell(); }
 
-  /// Some of the `<<` operators expect an lvalue, so we trick the type system.
+  /// Some of the `<<` operators expect an lvalue, so we trick the type
+  /// system.
   raw_ldbg_ostream &asLvalue() { return *this; }
 };
 
+/// A raw_ostream that prints a newline on destruction, useful for LDBG()
+class RAIINewLineStream final : public raw_ostream {
+  raw_ostream &Os;
+
+public:
+  RAIINewLineStream(raw_ostream &Os) : Os(Os) { SetUnbuffered(); }
+  ~RAIINewLineStream() { Os << '\n'; }
+  void write_impl(const char *Ptr, size_t Size) final { Os.write(Ptr, Size); }
+  uint64_t current_pos() const final { return Os.tell(); }
+};
+
 /// Remove the path prefix from the file name.
 static LLVM_ATTRIBUTE_UNUSED constexpr const char *
 getShortFileName(const char *path) {
diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index 734b795..d976773 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -1069,7 +1069,7 @@ public:
   /// Redirect each of the remapped files from first to second.
   static std::unique_ptr<RedirectingFileSystem>
   create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles,
-         bool UseExternalNames, FileSystem &ExternalFS);
+         bool UseExternalNames, IntrusiveRefCntPtr<FileSystem> ExternalFS);
 
   ErrorOr<Status> status(const Twine &Path) override;
   bool exists(const Twine &Path) override;
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 719c0ee..e57032a 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6494,7 +6494,7 @@ struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
   }
 
   constexpr static const std::optional<TypeSize> HasNoAllocationSize =
-      std::optional<TypeSize>(TypeSize(-1, true));
+      std::make_optional<TypeSize>(-1, true);
 
   LLVM_ABI static const char ID;
 };
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index cba3736..43ff084 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -400,19 +400,11 @@ public:
 
   /// Returns true if the loop has exactly one uncountable early exit, i.e. an
   /// uncountable exit that isn't the latch block.
-  bool hasUncountableEarlyExit() const {
-    return getUncountableEdge().has_value();
-  }
+  bool hasUncountableEarlyExit() const { return UncountableExitingBB; }
 
   /// Returns the uncountable early exiting block, if there is exactly one.
   BasicBlock *getUncountableEarlyExitingBlock() const {
-    return hasUncountableEarlyExit() ? getUncountableEdge()->first : nullptr;
-  }
-
-  /// Returns the destination of the uncountable early exiting block, if there
-  /// is exactly one.
-  BasicBlock *getUncountableEarlyExitBlock() const {
-    return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr;
+    return UncountableExitingBB;
   }
 
   /// Return true if there is store-load forwarding dependencies.
@@ -473,13 +465,6 @@ public:
     return CountableExitingBlocks;
   }
 
-  /// Returns the loop edge to an uncountable exit, or std::nullopt if there
-  /// isn't a single such edge.
-  std::optional<std::pair<BasicBlock *, BasicBlock *>>
-  getUncountableEdge() const {
-    return UncountableEdge;
-  }
-
 private:
   /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
   /// its nested loops are considered legal for vectorization. These legal
@@ -659,9 +644,9 @@ private:
   /// the exact backedge taken count is not computable.
   SmallVector<BasicBlock *, 4> CountableExitingBlocks;
 
-  /// Keep track of the loop edge to an uncountable exit, comprising a pair
-  /// of (Exiting, Exit) blocks, if there is exactly one early exit.
-  std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;
+  /// Keep track of an uncountable exiting block, if there is exactly one early
+  /// exit.
+  BasicBlock *UncountableExitingBB = nullptr;
 };
 
 } // namespace llvm