[𝘀𝗽𝗿] changes introduced through rebase

Created using spr 1.3.4 [skip ci]
author: Slava Zakharin <szakharin@nvidia.com> 2024-03-15 14:27:05 -0700
committer: Slava Zakharin <szakharin@nvidia.com> 2024-03-15 14:27:05 -0700
commit: 200c194c17b372e69b17bcec9a65c1bdea0965e1 (patch)
tree: f93a7811bb8abfa9be9d80fc89d6844bf6e6fed0
parent: d0a43d8ad55dcd075591bf9da3585a203acc4ad0 (diff)
parent: 71e0261fb0c6c382f68eedddf6bbcf637e6709f2 (diff)
download: llvm-200c194c17b372e69b17bcec9a65c1bdea0965e1.zip
llvm-200c194c17b372e69b17bcec9a65c1bdea0965e1.tar.gz
llvm-200c194c17b372e69b17bcec9a65c1bdea0965e1.tar.bz2
870 files changed, 27961 insertions, 7650 deletions
diff --git a/.ci/generate-buildkite-pipeline-premerge b/.ci/generate-buildkite-pipeline-premerge
index 4ebf304..2e503c8 100755
--- a/.ci/generate-buildkite-pipeline-premerge
+++ b/.ci/generate-buildkite-pipeline-premerge
@@ -68,7 +68,7 @@ function compute-projects-to-test() {
       done
     ;;
     clang)
-      for p in clang-tools-extra compiler-rt flang libc lldb openmp cross-project-tests; do
+      for p in clang-tools-extra compiler-rt flang lldb cross-project-tests; do
         echo $p
       done
     ;;
@@ -224,7 +224,7 @@ fi
 # needs while letting them run on the infrastructure provided by LLVM.
 
 # Figure out which projects need to be built on each platform
-all_projects="bolt clang-tools-extra compiler-rt cross-project-tests flang libc libclc lld lldb llvm mlir openmp polly pstl"
+all_projects="bolt clang clang-tools-extra compiler-rt cross-project-tests flang libc libclc lld lldb llvm mlir openmp polly pstl"
 modified_projects="$(keep-modified-projects ${all_projects})"
 
 linux_projects_to_test=$(exclude-linux $(compute-projects-to-test ${modified_projects}))
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 8585084..561da6a 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -32,6 +32,9 @@
 /clang/www/cxx_dr_status.html @Endilll
 /clang/www/make_cxx_dr_status @Endilll
 
+clang/lib/AST/Interp/ @tbaederr
+clang/test/AST/Interp/ @tbaederr
+
 /lldb/ @JDevlieghere
 
 # MLIR Interfaces.
@@ -105,3 +108,6 @@
 
 # BOLT
 /bolt/ @aaupov @maksfb @rafaelauler @ayermolo @dcci
+
+# Bazel build system.
+/utils/bazel/ @rupprecht
diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml
index 9a580c6..a042833 100644
--- a/.github/new-prs-labeler.yml
+++ b/.github/new-prs-labeler.yml
@@ -629,6 +629,8 @@ backend:DirectX:
   - '**/*DirectX*/**'
   - '**/*DXIL*/**'
   - '**/*dxil*/**'
+  - '**/*DXContainer*'
+  - '**/*DXContainer*/**'
 
 backend:SPIR-V:
   - clang/lib/Driver/ToolChains/SPIRV.*
@@ -933,3 +935,6 @@ openmp:libomp:
 
 openmp:libomptarget:
   - any: ['openmp/**', '!openmp/runtime/**']
+
+bazel:
+  - utils/bazel/**
diff --git a/bolt/include/bolt/Core/DIEBuilder.h b/bolt/include/bolt/Core/DIEBuilder.h
index 4debf93..0608481 100644
--- a/bolt/include/bolt/Core/DIEBuilder.h
+++ b/bolt/include/bolt/Core/DIEBuilder.h
@@ -209,7 +209,13 @@ private:
   void updateReferences();
 
   /// Update the Offset and Size of DIE, populate DebugNames table.
-  uint32_t finalizeDIEs(DWARFUnit &CU, DIE &Die, uint32_t &CurOffset);
+  /// Along with current CU, and DIE being processed and the new DIE offset to
+  /// be updated, it takes in Parents vector that can be empty if this DIE has
+  /// no parents.
+  uint32_t
+  finalizeDIEs(DWARFUnit &CU, DIE &Die,
+               std::vector<std::optional<BOLTDWARF5AccelTableData *>> &Parents,
+               uint32_t &CurOffset);
 
   void registerUnit(DWARFUnit &DU, bool NeedSort);
 
diff --git a/bolt/include/bolt/Core/DebugNames.h b/bolt/include/bolt/Core/DebugNames.h
index 84c448a..1f17f1a 100644
--- a/bolt/include/bolt/Core/DebugNames.h
+++ b/bolt/include/bolt/Core/DebugNames.h
@@ -36,6 +36,9 @@ public:
   bool isTU() const { return DWARF5AccelTableData::isTU(); }
   std::optional<unsigned> getSecondUnitID() const { return SecondUnitID; }
 
+  void setPatchOffset(uint64_t PatchOffset) { OffsetVal = PatchOffset; }
+  uint64_t getPatchOffset() const { return std::get<uint64_t>(OffsetVal); }
+
 private:
   std::optional<unsigned> SecondUnitID;
 };
@@ -49,10 +52,12 @@ public:
       Abbrev->~DebugNamesAbbrev();
   }
   /// Add DWARF5 Accelerator table entry.
-  /// Input is DWARFUnit being processed, DIE that belongs to it, and potential
-  /// SkeletonCU if the Unit comes from a DWO section.
-  void addAccelTableEntry(DWARFUnit &Unit, const DIE &Die,
-                          const std::optional<uint64_t> &DWOID);
+  /// Input is DWARFUnit being processed, DIE that belongs to it, potential
+  /// DWOID if the Unit comes from a DWO section, and potential parent entry.
+  std::optional<BOLTDWARF5AccelTableData *>
+  addAccelTableEntry(DWARFUnit &Unit, const DIE &Die,
+                     const std::optional<uint64_t> &DWOID,
+                     std::optional<BOLTDWARF5AccelTableData *> &Parent);
   /// Set current unit being processed.
   void setCurrentUnit(DWARFUnit &Unit, const uint64_t UnitStartOffset);
   /// Emit Accelerator table.
@@ -121,6 +126,8 @@ private:
   llvm::DenseMap<llvm::hash_code, uint64_t> StrCacheToOffsetMap;
   // Contains DWO ID to CUList Index.
   llvm::DenseMap<uint64_t, uint32_t> CUOffsetsToPatch;
+  // Contains a map of Entry ID to Entry relative offset.
+  llvm::DenseMap<uint64_t, uint32_t> EntryRelativeOffsets;
   /// Adds Unit to either CUList, LocalTUList or ForeignTUList.
   /// Input Unit being processed, and DWO ID if Unit is being processed comes
   /// from a DWO section.
@@ -143,7 +150,7 @@ private:
   /// Write Entries.
   void writeEntries();
   /// Write an Entry.
-  void writeEntry(const BOLTDWARF5AccelTableData &Entry);
+  void writeEntry(BOLTDWARF5AccelTableData &Entry);
   /// Write augmentation_string for BOLT.
   void writeAugmentationString();
   /// Emit out Header for DWARF5 Accelerator table.
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 6bb76d1..96b58f5 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -487,10 +487,9 @@ public:
     llvm_unreachable("not implemented");
   }
 
-  virtual bool createDirectCall(MCInst &Inst, const MCSymbol *Target,
+  virtual void createDirectCall(MCInst &Inst, const MCSymbol *Target,
                                 MCContext *Ctx, bool IsTailCall) {
     llvm_unreachable("not implemented");
-    return false;
   }
 
   virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
@@ -1534,15 +1533,13 @@ public:
   }
 
   /// Create a no-op instruction.
-  virtual bool createNoop(MCInst &Inst) const {
+  virtual void createNoop(MCInst &Inst) const {
     llvm_unreachable("not implemented");
-    return false;
   }
 
   /// Create a return instruction.
-  virtual bool createReturn(MCInst &Inst) const {
+  virtual void createReturn(MCInst &Inst) const {
     llvm_unreachable("not implemented");
-    return false;
   }
 
   /// Store \p Target absolute address to \p RegName
@@ -1556,32 +1553,30 @@ public:
 
   /// Creates a new unconditional branch instruction in Inst and set its operand
   /// to TBB.
-  ///
-  /// Returns true on success.
-  virtual bool createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
+  virtual void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
                                   MCContext *Ctx) const {
     llvm_unreachable("not implemented");
-    return false;
+  }
+
+  /// Create a version of unconditional jump that has the largest span for a
+  /// single instruction with direct target.
+  virtual void createLongUncondBranch(MCInst &Inst, const MCSymbol *Target,
+                                      MCContext *Ctx) const {
+    llvm_unreachable("not implemented");
   }
 
   /// Creates a new call instruction in Inst and sets its operand to
   /// Target.
-  ///
-  /// Returns true on success.
-  virtual bool createCall(MCInst &Inst, const MCSymbol *Target,
+  virtual void createCall(MCInst &Inst, const MCSymbol *Target,
                           MCContext *Ctx) {
     llvm_unreachable("not implemented");
-    return false;
   }
 
   /// Creates a new tail call instruction in Inst and sets its operand to
   /// Target.
-  ///
-  /// Returns true on success.
-  virtual bool createTailCall(MCInst &Inst, const MCSymbol *Target,
+  virtual void createTailCall(MCInst &Inst, const MCSymbol *Target,
                               MCContext *Ctx) {
     llvm_unreachable("not implemented");
-    return false;
   }
 
   virtual void createLongTailCall(InstructionListType &Seq,
@@ -1590,43 +1585,36 @@ public:
   }
 
   /// Creates a trap instruction in Inst.
-  ///
-  /// Returns true on success.
-  virtual bool createTrap(MCInst &Inst) const {
+  virtual void createTrap(MCInst &Inst) const {
     llvm_unreachable("not implemented");
-    return false;
   }
 
   /// Creates an instruction to bump the stack pointer just like a call.
-  virtual bool createStackPointerIncrement(MCInst &Inst, int Size = 8,
+  virtual void createStackPointerIncrement(MCInst &Inst, int Size = 8,
                                            bool NoFlagsClobber = false) const {
     llvm_unreachable("not implemented");
-    return false;
   }
 
   /// Creates an instruction to move the stack pointer just like a ret.
-  virtual bool createStackPointerDecrement(MCInst &Inst, int Size = 8,
+  virtual void createStackPointerDecrement(MCInst &Inst, int Size = 8,
                                            bool NoFlagsClobber = false) const {
     llvm_unreachable("not implemented");
-    return false;
   }
 
   /// Create a store instruction using \p StackReg as the base register
   /// and \p Offset as the displacement.
-  virtual bool createSaveToStack(MCInst &Inst, const MCPhysReg &StackReg,
+  virtual void createSaveToStack(MCInst &Inst, const MCPhysReg &StackReg,
                                  int Offset, const MCPhysReg &SrcReg,
                                  int Size) const {
     llvm_unreachable("not implemented");
-    return false;
   }
 
-  virtual bool createLoad(MCInst &Inst, const MCPhysReg &BaseReg, int64_t Scale,
+  virtual void createLoad(MCInst &Inst, const MCPhysReg &BaseReg, int64_t Scale,
                           const MCPhysReg &IndexReg, int64_t Offset,
                           const MCExpr *OffsetExpr,
                           const MCPhysReg &AddrSegmentReg,
                           const MCPhysReg &DstReg, int Size) const {
     llvm_unreachable("not implemented");
-    return false;
   }
 
   virtual InstructionListType createLoadImmediate(const MCPhysReg Dest,
@@ -1636,32 +1624,27 @@ public:
 
   /// Create a fragment of code (sequence of instructions) that load a 32-bit
   /// address from memory, zero-extends it to 64 and jump to it (indirect jump).
-  virtual bool
+  virtual void
   createIJmp32Frag(SmallVectorImpl<MCInst> &Insts, const MCOperand &BaseReg,
                    const MCOperand &Scale, const MCOperand &IndexReg,
                    const MCOperand &Offset, const MCOperand &TmpReg) const {
     llvm_unreachable("not implemented");
-    return false;
   }
 
   /// Create a load instruction using \p StackReg as the base register
   /// and \p Offset as the displacement.
-  virtual bool createRestoreFromStack(MCInst &Inst, const MCPhysReg &StackReg,
+  virtual void createRestoreFromStack(MCInst &Inst, const MCPhysReg &StackReg,
                                       int Offset, const MCPhysReg &DstReg,
                                       int Size) const {
     llvm_unreachable("not implemented");
-    return false;
   }
 
   /// Creates a call frame pseudo instruction. A single operand identifies which
   /// MCCFIInstruction this MCInst is referring to.
-  ///
-  /// Returns true on success.
-  virtual bool createCFI(MCInst &Inst, int64_t Offset) const {
+  virtual void createCFI(MCInst &Inst, int64_t Offset) const {
     Inst.clear();
     Inst.setOpcode(TargetOpcode::CFI_INSTRUCTION);
     Inst.addOperand(MCOperand::createImm(Offset));
-    return true;
   }
 
   /// Create an inline version of memcpy(dest, src, 1).
@@ -1699,6 +1682,12 @@ public:
     return Inst.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
   }
 
+  /// Create a conditional branch with a target-specific conditional code \p CC.
+  virtual void createCondBranch(MCInst &Inst, const MCSymbol *Target,
+                                unsigned CC, MCContext *Ctx) const {
+    llvm_unreachable("not implemented");
+  }
+
   /// Reverses the branch condition in Inst and update its taken target to TBB.
   ///
   /// Returns true on success.
diff --git a/bolt/lib/Core/DIEBuilder.cpp b/bolt/lib/Core/DIEBuilder.cpp
index 4228797..0cf8a5e 100644
--- a/bolt/lib/Core/DIEBuilder.cpp
+++ b/bolt/lib/Core/DIEBuilder.cpp
@@ -377,20 +377,32 @@ getUnitForOffset(DIEBuilder &Builder, DWARFContext &DWCtx,
   return nullptr;
 }
 
-uint32_t DIEBuilder::finalizeDIEs(DWARFUnit &CU, DIE &Die,
-                                  uint32_t &CurOffset) {
+uint32_t DIEBuilder::finalizeDIEs(
+    DWARFUnit &CU, DIE &Die,
+    std::vector<std::optional<BOLTDWARF5AccelTableData *>> &Parents,
+    uint32_t &CurOffset) {
   getState().DWARFDieAddressesParsed.erase(Die.getOffset());
   uint32_t CurSize = 0;
   Die.setOffset(CurOffset);
-  DebugNamesTable.addAccelTableEntry(
-      CU, Die, SkeletonCU ? SkeletonCU->getDWOId() : std::nullopt);
+  std::optional<BOLTDWARF5AccelTableData *> NameEntry =
+      DebugNamesTable.addAccelTableEntry(
+          CU, Die, SkeletonCU ? SkeletonCU->getDWOId() : std::nullopt,
+          Parents.back());
+  // It is possible that an indexed debugging information entry has a parent
+  // that is not indexed (for example, if its parent does not have a name
+  // attribute). In such a case, a parent attribute may point to a nameless
+  // index entry (that is, one that cannot be reached from any entry in the name
+  // table), or it may point to the nearest ancestor that does have an index
+  // entry.
+  if (NameEntry)
+    Parents.push_back(std::move(NameEntry));
   for (DIEValue &Val : Die.values())
     CurSize += Val.sizeOf(CU.getFormParams());
   CurSize += getULEB128Size(Die.getAbbrevNumber());
   CurOffset += CurSize;
 
   for (DIE &Child : Die.children()) {
-    uint32_t ChildSize = finalizeDIEs(CU, Child, CurOffset);
+    uint32_t ChildSize = finalizeDIEs(CU, Child, Parents, CurOffset);
     CurSize += ChildSize;
   }
   // for children end mark.
@@ -400,6 +412,8 @@ uint32_t DIEBuilder::finalizeDIEs(DWARFUnit &CU, DIE &Die,
   }
 
   Die.setSize(CurSize);
+  if (NameEntry)
+    Parents.pop_back();
 
   return CurSize;
 }
@@ -410,7 +424,9 @@ void DIEBuilder::finish() {
     uint32_t HeaderSize = CU.getHeaderSize();
     uint32_t CurOffset = HeaderSize;
     DebugNamesTable.setCurrentUnit(CU, UnitStartOffset);
-    finalizeDIEs(CU, *UnitDIE, CurOffset);
+    std::vector<std::optional<BOLTDWARF5AccelTableData *>> Parents;
+    Parents.push_back(std::nullopt);
+    finalizeDIEs(CU, *UnitDIE, Parents, CurOffset);
 
     DWARFUnitInfo &CurUnitInfo = getUnitInfoByDwarfUnit(CU);
     CurUnitInfo.UnitOffset = UnitStartOffset;
diff --git a/bolt/lib/Core/DebugNames.cpp b/bolt/lib/Core/DebugNames.cpp
index 384e636..4bcedbe 100644
--- a/bolt/lib/Core/DebugNames.cpp
+++ b/bolt/lib/Core/DebugNames.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/LEB128.h"
 #include <cstdint>
+#include <optional>
 
 namespace llvm {
 namespace bolt {
@@ -163,10 +164,16 @@ static uint64_t getNameOffset(BinaryContext &BC, DWARFUnit &Unit,
                                    Index * DwarfOffsetByteSize);
 }
 
-void DWARF5AcceleratorTable::addAccelTableEntry(
-    DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID) {
+static uint64_t getEntryID(const BOLTDWARF5AccelTableData &Entry) {
+  return reinterpret_cast<uint64_t>(&Entry);
+}
+
+std::optional<BOLTDWARF5AccelTableData *>
+DWARF5AcceleratorTable::addAccelTableEntry(
+    DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID,
+    std::optional<BOLTDWARF5AccelTableData *> &Parent) {
   if (Unit.getVersion() < 5 || !NeedToCreate)
-    return;
+    return std::nullopt;
   std::string NameToUse = "";
   auto canProcess = [&](const DIE &Die) -> bool {
     switch (Die.getTag()) {
@@ -217,7 +224,7 @@ void DWARF5AcceleratorTable::addAccelTableEntry(
   };
 
   if (!canProcess(Die))
-    return;
+    return std::nullopt;
 
   // Addes a Unit to either CU, LocalTU or ForeignTU list the first time we
   // encounter it.
@@ -227,10 +234,11 @@ void DWARF5AcceleratorTable::addAccelTableEntry(
     addUnit(Unit, DWOID);
   }
 
-  auto addEntry = [&](DIEValue ValName) -> void {
+  auto addEntry =
+      [&](DIEValue ValName) -> std::optional<BOLTDWARF5AccelTableData *> {
     if ((!ValName || ValName.getForm() == dwarf::DW_FORM_string) &&
         NameToUse.empty())
-      return;
+      return std::nullopt;
     std::string Name = "";
     uint64_t NameIndexOffset = 0;
     if (NameToUse.empty()) {
@@ -275,13 +283,23 @@ void DWARF5AcceleratorTable::addAccelTableEntry(
                   << ".\n";
       SecondIndex = Iter->second;
     }
+    std::optional<uint64_t> ParentOffset =
+        (Parent ? std::optional<uint64_t>(getEntryID(**Parent)) : std::nullopt);
+    // This will be populated later in writeEntry.
+    // This way only parent entries get tracked.
+    // Keeping memory footprint down.
+    if (ParentOffset)
+      EntryRelativeOffsets.insert({*ParentOffset, 0});
     It.Values.push_back(new (Allocator) BOLTDWARF5AccelTableData(
-        Die.getOffset(), std::nullopt, DieTag, UnitID, IsTU, SecondIndex));
+        Die.getOffset(), ParentOffset, DieTag, UnitID, IsTU, SecondIndex));
+    return It.Values.back();
   };
 
-  addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_name));
-  addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_linkage_name));
-  return;
+  std::optional<BOLTDWARF5AccelTableData *> NameEntry =
+      addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_name));
+  std::optional<BOLTDWARF5AccelTableData *> LinkageNameEntry =
+      addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_linkage_name));
+  return NameEntry ? NameEntry : LinkageNameEntry;
 }
 
 /// Algorithm from llvm implementation.
@@ -382,6 +400,11 @@ void DWARF5AcceleratorTable::populateAbbrevsMap() {
         if (SecondEntryRet)
           Abbrev.addAttribute(SecondEntryRet->Encoding);
         Abbrev.addAttribute({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
+        if (std::optional<uint64_t> Offset = Value->getParentDieOffset())
+          Abbrev.addAttribute({dwarf::DW_IDX_parent, dwarf::DW_FORM_ref4});
+        else
+          Abbrev.addAttribute(
+              {dwarf::DW_IDX_parent, dwarf::DW_FORM_flag_present});
         FoldingSetNodeID ID;
         Abbrev.Profile(ID);
         void *InsertPos;
@@ -401,7 +424,11 @@ void DWARF5AcceleratorTable::populateAbbrevsMap() {
   }
 }
 
-void DWARF5AcceleratorTable::writeEntry(const BOLTDWARF5AccelTableData &Entry) {
+void DWARF5AcceleratorTable::writeEntry(BOLTDWARF5AccelTableData &Entry) {
+  const uint64_t EntryID = getEntryID(Entry);
+  if (EntryRelativeOffsets.find(EntryID) != EntryRelativeOffsets.end())
+    EntryRelativeOffsets[EntryID] = EntriesBuffer->size();
+
   const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
       getIndexForEntry(Entry);
   // For forgeign type (FTU) units that need to refer to the FTU and to the CU.
@@ -456,6 +483,17 @@ void DWARF5AcceleratorTable::writeEntry(const BOLTDWARF5AccelTableData &Entry) {
                              llvm::endianness::little);
       break;
     }
+    case dwarf::DW_IDX_parent: {
+      assert(
+          (AttrEnc.Form == dwarf::DW_FORM_ref4 && Entry.getParentDieOffset()) ||
+          AttrEnc.Form == dwarf::DW_FORM_flag_present);
+      if (std::optional<uint64_t> ParentOffset = Entry.getParentDieOffset()) {
+        Entry.setPatchOffset(EntriesBuffer->size());
+        support::endian::write(*Entriestream, static_cast<uint32_t>(UINT32_MAX),
+                               llvm::endianness::little);
+      }
+      break;
+    }
     }
   }
 }
@@ -464,13 +502,34 @@ void DWARF5AcceleratorTable::writeEntries() {
   for (auto &Bucket : getBuckets()) {
     for (DWARF5AcceleratorTable::HashData *Hash : Bucket) {
       Hash->EntryOffset = EntriesBuffer->size();
-      for (const BOLTDWARF5AccelTableData *Value : Hash->Values) {
+      for (BOLTDWARF5AccelTableData *Value : Hash->Values) {
         writeEntry(*Value);
       }
       support::endian::write(*Entriestream, static_cast<uint8_t>(0),
                              llvm::endianness::little);
     }
   }
+  // Patching parent offsets.
+  for (auto &Bucket : getBuckets()) {
+    for (DWARF5AcceleratorTable::HashData *Hash : Bucket) {
+      for (BOLTDWARF5AccelTableData *Entry : Hash->Values) {
+        std::optional<uint64_t> ParentOffset = Entry->getParentDieOffset();
+        if (!ParentOffset)
+          continue;
+        if (const auto Iter = EntryRelativeOffsets.find(*ParentOffset);
+            Iter != EntryRelativeOffsets.end()) {
+          const uint64_t PatchOffset = Entry->getPatchOffset();
+          uint32_t *Ptr = reinterpret_cast<uint32_t *>(
+              &EntriesBuffer.get()->data()[PatchOffset]);
+          *Ptr = Iter->second;
+        } else {
+          BC.errs() << "BOLT-WARNING: [internal-dwarf-warning]: Could not find "
+                       "entry with offset "
+                    << *ParentOffset << "\n";
+        }
+      }
+    }
+  }
 }
 
 void DWARF5AcceleratorTable::writeAugmentationString() {
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index d2850b0..bf1c2dd 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1056,10 +1056,9 @@ void Peepholes::addTailcallTraps(BinaryFunction &Function) {
     MCInst *Inst = BB.getLastNonPseudoInstr();
     if (Inst && MIB->isTailCall(*Inst) && MIB->isIndirectBranch(*Inst)) {
       MCInst Trap;
-      if (MIB->createTrap(Trap)) {
-        BB.addInstruction(Trap);
-        ++TailCallTraps;
-      }
+      MIB->createTrap(Trap);
+      BB.addInstruction(Trap);
+      ++TailCallTraps;
     }
   }
 }
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp b/bolt/lib/Passes/ShrinkWrapping.cpp
index 9a1f9d7..c970650 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -680,16 +680,15 @@ void StackLayoutModifier::performChanges() {
       if (StackPtrReg != BC.MIB->getFramePointer())
         Adjustment = -Adjustment;
       if (IsLoad)
-        Success = BC.MIB->createRestoreFromStack(
-            Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
+        BC.MIB->createRestoreFromStack(Inst, StackPtrReg,
+                                       StackOffset + Adjustment, Reg, Size);
       else if (IsStore)
-        Success = BC.MIB->createSaveToStack(
-            Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
+        BC.MIB->createSaveToStack(Inst, StackPtrReg, StackOffset + Adjustment,
+                                  Reg, Size);
       LLVM_DEBUG({
         dbgs() << "Adjusted instruction: ";
         Inst.dump();
       });
-      assert(Success);
     }
   }
 }
@@ -1653,19 +1652,13 @@ Expected<MCInst> ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
   if (SPVal != StackPointerTracking::SUPERPOSITION &&
       SPVal != StackPointerTracking::EMPTY) {
     if (FIE.IsLoad) {
-      if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
-                                          FIE.StackOffset - SPVal, FIE.RegOrImm,
-                                          FIE.Size)) {
-        return createFatalBOLTError(
-            "createRestoreFromStack: not supported on this platform\n");
-      }
-    } else {
-      if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
+      BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
                                      FIE.StackOffset - SPVal, FIE.RegOrImm,
-                                     FIE.Size)) {
-        return createFatalBOLTError(
-            "createSaveToStack: not supported on this platform\n");
-      }
+                                     FIE.Size);
+    } else {
+      BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
+                                FIE.StackOffset - SPVal, FIE.RegOrImm,
+                                FIE.Size);
     }
     if (CreatePushOrPop)
       BC.MIB->changeToPushOrPop(NewInst);
@@ -1675,19 +1668,12 @@ Expected<MCInst> ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
          FPVal != StackPointerTracking::EMPTY);
 
   if (FIE.IsLoad) {
-    if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
-                                        FIE.StackOffset - FPVal, FIE.RegOrImm,
-                                        FIE.Size)) {
-      return createFatalBOLTError(
-          "createRestoreFromStack: not supported on this platform\n");
-    }
-  } else {
-    if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
+    BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
                                    FIE.StackOffset - FPVal, FIE.RegOrImm,
-                                   FIE.Size)) {
-      return createFatalBOLTError(
-          "createSaveToStack: not supported on this platform\n");
-    }
+                                   FIE.Size);
+  } else {
+    BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
+                              FIE.StackOffset - FPVal, FIE.RegOrImm, FIE.Size);
   }
   return NewInst;
 }
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 631ccae..016962f 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -705,6 +705,9 @@ void assignProfile(BinaryFunction &BF,
 
 bool YAMLProfileReader::inferStaleProfile(
     BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {
+  if (!BF.hasCFG())
+    return false;
+
   LLVM_DEBUG(dbgs() << "BOLT-INFO: applying profile inference for "
                     << "\"" << BF.getPrintName() << "\"\n");
 
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 9efb428..0ae9d36 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1026,7 +1026,7 @@ public:
     return Code;
   }
 
-  bool createTailCall(MCInst &Inst, const MCSymbol *Target,
+  void createTailCall(MCInst &Inst, const MCSymbol *Target,
                       MCContext *Ctx) override {
     return createDirectCall(Inst, Target, Ctx, /*IsTailCall*/ true);
   }
@@ -1036,11 +1036,10 @@ public:
     createShortJmp(Seq, Target, Ctx, /*IsTailCall*/ true);
   }
 
-  bool createTrap(MCInst &Inst) const override {
+  void createTrap(MCInst &Inst) const override {
     Inst.clear();
     Inst.setOpcode(AArch64::BRK);
     Inst.addOperand(MCOperand::createImm(1));
-    return true;
   }
 
   bool convertJmpToTailCall(MCInst &Inst) override {
@@ -1068,16 +1067,15 @@ public:
            Inst.getOperand(0).getImm() == 0;
   }
 
-  bool createNoop(MCInst &Inst) const override {
+  void createNoop(MCInst &Inst) const override {
     Inst.setOpcode(AArch64::HINT);
     Inst.clear();
     Inst.addOperand(MCOperand::createImm(0));
-    return true;
   }
 
   bool mayStore(const MCInst &Inst) const override { return false; }
 
-  bool createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx,
+  void createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx,
                         bool IsTailCall) override {
     Inst.setOpcode(IsTailCall ? AArch64::B : AArch64::BL);
     Inst.clear();
@@ -1086,7 +1084,6 @@ public:
         *Ctx, 0)));
     if (IsTailCall)
       convertJmpToTailCall(Inst);
-    return true;
   }
 
   bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
@@ -1293,14 +1290,13 @@ public:
     return true;
   }
 
-  bool createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
+  void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
                           MCContext *Ctx) const override {
     Inst.setOpcode(AArch64::B);
     Inst.clear();
     Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
         Inst, MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx),
         *Ctx, 0)));
-    return true;
   }
 
   bool shouldRecordCodeRelocation(uint64_t RelType) const override {
@@ -1353,14 +1349,13 @@ public:
     return StringRef("\0\0\0\0", 4);
   }
 
-  bool createReturn(MCInst &Inst) const override {
+  void createReturn(MCInst &Inst) const override {
     Inst.setOpcode(AArch64::RET);
     Inst.clear();
     Inst.addOperand(MCOperand::createReg(AArch64::LR));
-    return true;
   }
 
-  bool createStackPointerIncrement(
+  void createStackPointerIncrement(
       MCInst &Inst, int Size,
       bool NoFlagsClobber = false /*unused for AArch64*/) const override {
     Inst.setOpcode(AArch64::SUBXri);
@@ -1369,10 +1364,9 @@ public:
     Inst.addOperand(MCOperand::createReg(AArch64::SP));
     Inst.addOperand(MCOperand::createImm(Size));
     Inst.addOperand(MCOperand::createImm(0));
-    return true;
   }
 
-  bool createStackPointerDecrement(
+  void createStackPointerDecrement(
       MCInst &Inst, int Size,
       bool NoFlagsClobber = false /*unused for AArch64*/) const override {
     Inst.setOpcode(AArch64::ADDXri);
@@ -1381,12 +1375,12 @@ public:
     Inst.addOperand(MCOperand::createReg(AArch64::SP));
     Inst.addOperand(MCOperand::createImm(Size));
     Inst.addOperand(MCOperand::createImm(0));
-    return true;
   }
 
   void createIndirectBranch(MCInst &Inst, MCPhysReg MemBaseReg,
                             int64_t Disp) const {
     Inst.setOpcode(AArch64::BR);
+    Inst.clear();
     Inst.addOperand(MCOperand::createReg(MemBaseReg));
   }
 
diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
index e19c070..ab9623d 100644
--- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
+++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
@@ -219,46 +219,43 @@ public:
     return true;
   }
 
-  bool createReturn(MCInst &Inst) const override {
+  void createReturn(MCInst &Inst) const override {
     // TODO "c.jr ra" when RVC is enabled
     Inst.setOpcode(RISCV::JALR);
     Inst.clear();
     Inst.addOperand(MCOperand::createReg(RISCV::X0));
     Inst.addOperand(MCOperand::createReg(RISCV::X1));
     Inst.addOperand(MCOperand::createImm(0));
-    return true;
   }
 
-  bool createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
+  void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
                           MCContext *Ctx) const override {
     Inst.setOpcode(RISCV::JAL);
     Inst.clear();
     Inst.addOperand(MCOperand::createReg(RISCV::X0));
     Inst.addOperand(MCOperand::createExpr(
         MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx)));
-    return true;
   }
 
   StringRef getTrapFillValue() const override {
     return StringRef("\0\0\0\0", 4);
   }
 
-  bool createCall(unsigned Opcode, MCInst &Inst, const MCSymbol *Target,
+  void createCall(unsigned Opcode, MCInst &Inst, const MCSymbol *Target,
                   MCContext *Ctx) {
     Inst.setOpcode(Opcode);
     Inst.clear();
     Inst.addOperand(MCOperand::createExpr(RISCVMCExpr::create(
         MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx),
         RISCVMCExpr::VK_RISCV_CALL, *Ctx)));
-    return true;
   }
 
-  bool createCall(MCInst &Inst, const MCSymbol *Target,
+  void createCall(MCInst &Inst, const MCSymbol *Target,
                   MCContext *Ctx) override {
     return createCall(RISCV::PseudoCALL, Inst, Target, Ctx);
   }
 
-  bool createTailCall(MCInst &Inst, const MCSymbol *Target,
+  void createTailCall(MCInst &Inst, const MCSymbol *Target,
                       MCContext *Ctx) override {
     return createCall(RISCV::PseudoTAIL, Inst, Target, Ctx);
   }
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index 387cf48..de55fbe 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -2230,7 +2230,7 @@ public:
     return true;
   }
 
-  bool createStackPointerIncrement(MCInst &Inst, int Size,
+  void createStackPointerIncrement(MCInst &Inst, int Size,
                                    bool NoFlagsClobber) const override {
     if (NoFlagsClobber) {
       Inst.setOpcode(X86::LEA64r);
@@ -2241,17 +2241,16 @@ public:
       Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
       Inst.addOperand(MCOperand::createImm(-Size));           // Displacement
       Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
-      return true;
+      return;
     }
     Inst.setOpcode(X86::SUB64ri8);
     Inst.clear();
     Inst.addOperand(MCOperand::createReg(X86::RSP));
     Inst.addOperand(MCOperand::createReg(X86::RSP));
     Inst.addOperand(MCOperand::createImm(Size));
-    return true;
   }
 
-  bool createStackPointerDecrement(MCInst &Inst, int Size,
+  void createStackPointerDecrement(MCInst &Inst, int Size,
                                    bool NoFlagsClobber) const override {
     if (NoFlagsClobber) {
       Inst.setOpcode(X86::LEA64r);
@@ -2262,22 +2261,22 @@ public:
       Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
       Inst.addOperand(MCOperand::createImm(Size));            // Displacement
       Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
-      return true;
+      return;
     }
     Inst.setOpcode(X86::ADD64ri8);
     Inst.clear();
     Inst.addOperand(MCOperand::createReg(X86::RSP));
     Inst.addOperand(MCOperand::createReg(X86::RSP));
     Inst.addOperand(MCOperand::createImm(Size));
-    return true;
   }
 
-  bool createSaveToStack(MCInst &Inst, const MCPhysReg &StackReg, int Offset,
+  void createSaveToStack(MCInst &Inst, const MCPhysReg &StackReg, int Offset,
                          const MCPhysReg &SrcReg, int Size) const override {
     unsigned NewOpcode;
     switch (Size) {
     default:
-      return false;
+      llvm_unreachable("Invalid operand size");
+      return;
     case 2:      NewOpcode = X86::MOV16mr; break;
     case 4:      NewOpcode = X86::MOV32mr; break;
     case 8:      NewOpcode = X86::MOV64mr; break;
@@ -2290,10 +2289,9 @@ public:
     Inst.addOperand(MCOperand::createImm(Offset));          // Displacement
     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
     Inst.addOperand(MCOperand::createReg(SrcReg));
-    return true;
   }
 
-  bool createRestoreFromStack(MCInst &Inst, const MCPhysReg &StackReg,
+  void createRestoreFromStack(MCInst &Inst, const MCPhysReg &StackReg,
                               int Offset, const MCPhysReg &DstReg,
                               int Size) const override {
     return createLoad(Inst, StackReg, /*Scale=*/1, /*IndexReg=*/X86::NoRegister,
@@ -2301,14 +2299,15 @@ public:
                       DstReg, Size);
   }
 
-  bool createLoad(MCInst &Inst, const MCPhysReg &BaseReg, int64_t Scale,
+  void createLoad(MCInst &Inst, const MCPhysReg &BaseReg, int64_t Scale,
                   const MCPhysReg &IndexReg, int64_t Offset,
                   const MCExpr *OffsetExpr, const MCPhysReg &AddrSegmentReg,
                   const MCPhysReg &DstReg, int Size) const override {
     unsigned NewOpcode;
     switch (Size) {
     default:
-      return false;
+      llvm_unreachable("Invalid operand size");
+      return;
     case 2:      NewOpcode = X86::MOV16rm; break;
     case 4:      NewOpcode = X86::MOV32rm; break;
     case 8:      NewOpcode = X86::MOV64rm; break;
@@ -2324,7 +2323,6 @@ public:
     else
       Inst.addOperand(MCOperand::createImm(Offset)); // Displacement
     Inst.addOperand(MCOperand::createReg(AddrSegmentReg)); // AddrSegmentReg
-    return true;
   }
 
   InstructionListType createLoadImmediate(const MCPhysReg Dest,
@@ -2338,7 +2336,7 @@ public:
     return Insts;
   }
 
-  bool createIJmp32Frag(SmallVectorImpl<MCInst> &Insts,
+  void createIJmp32Frag(SmallVectorImpl<MCInst> &Insts,
                         const MCOperand &BaseReg, const MCOperand &Scale,
                         const MCOperand &IndexReg, const MCOperand &Offset,
                         const MCOperand &TmpReg) const override {
@@ -2362,17 +2360,16 @@ public:
 
     Insts.push_back(Load);
     Insts.push_back(IJmp);
-    return true;
   }
 
-  bool createNoop(MCInst &Inst) const override {
+  void createNoop(MCInst &Inst) const override {
     Inst.setOpcode(X86::NOOP);
-    return true;
+    Inst.clear();
   }
 
-  bool createReturn(MCInst &Inst) const override {
+  void createReturn(MCInst &Inst) const override {
     Inst.setOpcode(X86::RET64);
-    return true;
+    Inst.clear();
   }
 
   InstructionListType createInlineMemcpy(bool ReturnEnd) const override {
@@ -2729,23 +2726,31 @@ public:
     return FoundOne;
   }
 
-  bool createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
+  void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
                           MCContext *Ctx) const override {
     Inst.setOpcode(X86::JMP_1);
+    Inst.clear();
     Inst.addOperand(MCOperand::createExpr(
         MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx)));
-    return true;
   }
 
-  bool createCall(MCInst &Inst, const MCSymbol *Target,
+  void createLongUncondBranch(MCInst &Inst, const MCSymbol *Target,
+                              MCContext *Ctx) const override {
+    Inst.setOpcode(X86::JMP_4);
+    Inst.clear();
+    Inst.addOperand(MCOperand::createExpr(
+        MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
+  }
+
+  void createCall(MCInst &Inst, const MCSymbol *Target,
                   MCContext *Ctx) override {
     Inst.setOpcode(X86::CALL64pcrel32);
+    Inst.clear();
     Inst.addOperand(MCOperand::createExpr(
         MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
-    return true;
   }
 
-  bool createTailCall(MCInst &Inst, const MCSymbol *Target,
+  void createTailCall(MCInst &Inst, const MCSymbol *Target,
                       MCContext *Ctx) override {
     return createDirectCall(Inst, Target, Ctx, /*IsTailCall*/ true);
   }
@@ -2757,10 +2762,18 @@ public:
     createDirectCall(Seq.back(), Target, Ctx, /*IsTailCall*/ true);
   }
 
-  bool createTrap(MCInst &Inst) const override {
+  void createTrap(MCInst &Inst) const override {
     Inst.clear();
     Inst.setOpcode(X86::TRAP);
-    return true;
+  }
+
+  void createCondBranch(MCInst &Inst, const MCSymbol *Target, unsigned CC,
+                        MCContext *Ctx) const override {
+    Inst.setOpcode(X86::JCC_1);
+    Inst.clear();
+    Inst.addOperand(MCOperand::createExpr(
+        MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
+    Inst.addOperand(MCOperand::createImm(CC));
   }
 
   bool reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
@@ -2862,7 +2875,7 @@ public:
     Inst.setOpcode(X86::LFENCE);
   }
 
-  bool createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx,
+  void createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx,
                         bool IsTailCall) override {
     Inst.clear();
     Inst.setOpcode(IsTailCall ? X86::JMP_4 : X86::CALL64pcrel32);
@@ -2870,7 +2883,6 @@ public:
         MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
     if (IsTailCall)
       setTailCall(Inst);
-    return true;
   }
 
   void createShortJmp(InstructionListType &Seq, const MCSymbol *Target,
@@ -3066,6 +3078,7 @@ public:
   void createSwap(MCInst &Inst, MCPhysReg Source, MCPhysReg MemBaseReg,
                   int64_t Disp) const {
     Inst.setOpcode(X86::XCHG64rm);
+    Inst.clear();
     Inst.addOperand(MCOperand::createReg(Source));
     Inst.addOperand(MCOperand::createReg(Source));
     Inst.addOperand(MCOperand::createReg(MemBaseReg));      // BaseReg
@@ -3078,6 +3091,7 @@ public:
   void createIndirectBranch(MCInst &Inst, MCPhysReg MemBaseReg,
                             int64_t Disp) const {
     Inst.setOpcode(X86::JMP64m);
+    Inst.clear();
     Inst.addOperand(MCOperand::createReg(MemBaseReg));      // BaseReg
     Inst.addOperand(MCOperand::createImm(1));               // ScaleAmt
     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
@@ -3540,9 +3554,10 @@ public:
   }
 
 private:
-  bool createMove(MCInst &Inst, const MCSymbol *Src, unsigned Reg,
+  void createMove(MCInst &Inst, const MCSymbol *Src, unsigned Reg,
                   MCContext *Ctx) const {
     Inst.setOpcode(X86::MOV64rm);
+    Inst.clear();
     Inst.addOperand(MCOperand::createReg(Reg));
     Inst.addOperand(MCOperand::createReg(X86::RIP));        // BaseReg
     Inst.addOperand(MCOperand::createImm(1));               // ScaleAmt
@@ -3551,13 +3566,12 @@ private:
         MCSymbolRefExpr::create(Src, MCSymbolRefExpr::VK_None,
                                 *Ctx)));                    // Displacement
     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
-
-    return true;
   }
 
-  bool createLea(MCInst &Inst, const MCSymbol *Src, unsigned Reg,
+  void createLea(MCInst &Inst, const MCSymbol *Src, unsigned Reg,
                  MCContext *Ctx) const {
     Inst.setOpcode(X86::LEA64r);
+    Inst.clear();
     Inst.addOperand(MCOperand::createReg(Reg));
     Inst.addOperand(MCOperand::createReg(X86::RIP));        // BaseReg
     Inst.addOperand(MCOperand::createImm(1));               // ScaleAmt
@@ -3566,7 +3580,6 @@ private:
         MCSymbolRefExpr::create(Src, MCSymbolRefExpr::VK_None,
                                 *Ctx)));                    // Displacement
     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
-    return true;
   }
 };
 
diff --git a/bolt/test/X86/dwarf5-debug-names-generate-debug-names.test b/bolt/test/X86/dwarf5-debug-names-generate-debug-names.test
index b8789a6..ae921d1 100644
--- a/bolt/test/X86/dwarf5-debug-names-generate-debug-names.test
+++ b/bolt/test/X86/dwarf5-debug-names-generate-debug-names.test
@@ -14,7 +14,7 @@
 ; BOLT: [[OFFSET2:0x[0-9a-f]*]]: Compile Unit
 ; BOLT:       Name Index @ 0x0 {
 ; BOLT-NEXT:   Header {
-; BOLT-NEXT:     Length: 0x103
+; BOLT-NEXT:     Length: 0x109
 ; BOLT-NEXT:     Format: DWARF32
 ; BOLT-NEXT:     Version: 5
 ; BOLT-NEXT:     CU count: 2
@@ -22,7 +22,7 @@
 ; BOLT-NEXT:     Foreign TU count: 0
 ; BOLT-NEXT:     Bucket count: 8
 ; BOLT-NEXT:     Name count: 8
-; BOLT-NEXT:     Abbreviations table size: 0x19
+; BOLT-NEXT:     Abbreviations table size: 0x1F
 ; BOLT-NEXT:     Augmentation: 'BOLT'
 ; BOLT-NEXT:   }
 ; BOLT-NEXT:   Compilation Unit offsets [
@@ -34,16 +34,19 @@
 ; BOLT-NEXT:       Tag: DW_TAG_base_type
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_subprogram
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_variable
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
 ; BOLT-NEXT:   Bucket 0 [
@@ -55,12 +58,14 @@
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000033
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: [[ABBREV1]]
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000007f
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -73,6 +78,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000005e
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 3 {
@@ -83,6 +89,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000028
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -95,6 +102,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000028
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 5 {
@@ -105,6 +113,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000049
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 6 {
@@ -115,6 +124,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_variable
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000028
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -127,6 +137,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000092
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -145,6 +156,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000005e
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
diff --git a/bolt/test/X86/dwarf5-debug-names.test b/bolt/test/X86/dwarf5-debug-names.test
index de0e88d..8e45ef3 100644
--- a/bolt/test/X86/dwarf5-debug-names.test
+++ b/bolt/test/X86/dwarf5-debug-names.test
@@ -11,7 +11,7 @@
 ; BOLT: [[OFFSET2:0x[0-9a-f]*]]: Compile Unit
 ; BOLT:       Name Index @ 0x0 {
 ; BOLT-NEXT:   Header {
-; BOLT-NEXT:     Length: 0x1C2
+; BOLT-NEXT:     Length: 0x1DA
 ; BOLT-NEXT:     Format: DWARF32
 ; BOLT-NEXT:     Version: 5
 ; BOLT-NEXT:     CU count: 2
@@ -19,7 +19,7 @@
 ; BOLT-NEXT:     Foreign TU count: 0
 ; BOLT-NEXT:     Bucket count: 14
 ; BOLT-NEXT:     Name count: 15
-; BOLT-NEXT:     Abbreviations table size: 0x29
+; BOLT-NEXT:     Abbreviations table size: 0x3D
 ; BOLT-NEXT:     Augmentation: 'BOLT'
 ; BOLT-NEXT:   }
 ; BOLT-NEXT:   Compilation Unit offsets [
@@ -31,27 +31,38 @@
 ; BOLT-NEXT:       Tag: DW_TAG_structure_type
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_namespace
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_subprogram
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_base_type
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV5:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_variable
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
+; BOLT-NEXT:       Abbreviation [[ABBREV6:0x[0-9a-f]*]] {
+; BOLT-NEXT:         Tag: DW_TAG_structure_type
+; BOLT-NEXT:         DW_IDX_compile_unit: DW_FORM_data1
+; BOLT-NEXT:         DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:         DW_IDX_parent: DW_FORM_ref4
+; BOLT-NEXT:       }
 ; BOLT-NEXT:   ]
 ; BOLT-NEXT:   Bucket 0 [
 ; BOLT-NEXT:     Name 1 {
@@ -62,6 +73,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000002f
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -74,6 +86,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x000000eb
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -81,17 +94,19 @@
 ; BOLT-NEXT:     Name 3 {
 ; BOLT-NEXT:       Hash: 0x8CFC710C
 ; BOLT-NEXT:       String: {{.+}} "(anonymous namespace)"
-; BOLT-NEXT:       Entry @ {{.+}} {
+; BOLT-NEXT:       Entry @ [[ENTRY:0x[0-9a-f]*]] {
 ; BOLT-NEXT:         Abbrev: [[ABBREV2]]
 ; BOLT-NEXT:         Tag: DW_TAG_namespace
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000061
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: [[ABBREV2]]
 ; BOLT-NEXT:         Tag: DW_TAG_namespace
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000061
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 4 {
@@ -102,6 +117,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000005a
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -120,6 +136,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x000000c9
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 6 {
@@ -130,6 +147,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000033
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 7 {
@@ -140,12 +158,14 @@
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000009f
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: [[ABBREV4]]
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x000000c5
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -158,6 +178,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000003f
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 9 {
@@ -168,6 +189,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_variable
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000024
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -180,6 +202,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000033
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -192,6 +215,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_variable
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000024
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -207,12 +231,14 @@
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000002f
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: [[ABBREV4]]
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000005d
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 13 {
@@ -223,12 +249,14 @@
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000078
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: [[ABBREV1]]
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000104
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 14 {
@@ -239,6 +267,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000073
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -250,10 +279,11 @@
 ; BOLT-NEXT:       Hash: 0x59796A
 ; BOLT-NEXT:       String: {{.+}} "t1"
 ; BOLT-NEXT:       Entry @ {{.+}} {
-; BOLT-NEXT:         Abbrev: [[ABBREV1]]
+; BOLT-NEXT:         Abbrev: [[ABBREV6]]
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000062
+; BOLT-NEXT:         DW_IDX_parent: Entry @ [[ENTRY]]
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
diff --git a/bolt/test/X86/dwarf5-df-debug-names-generate-debug-names.test b/bolt/test/X86/dwarf5-df-debug-names-generate-debug-names.test
index 3be327b..b0b6707 100644
--- a/bolt/test/X86/dwarf5-df-debug-names-generate-debug-names.test
+++ b/bolt/test/X86/dwarf5-df-debug-names-generate-debug-names.test
@@ -17,7 +17,7 @@
 ; BOLT: [[OFFSET2:0x[0-9a-f]*]]: Compile Unit
 ; BOLT:       Name Index @ 0x0 {
 ; BOLT-NEXT:   Header {
-; BOLT-NEXT:     Length: 0x148
+; BOLT-NEXT:     Length: 0x14E
 ; BOLT-NEXT:     Format: DWARF32
 ; BOLT-NEXT:     Version: 5
 ; BOLT-NEXT:     CU count: 2
@@ -25,7 +25,7 @@
 ; BOLT-NEXT:     Foreign TU count: 0
 ; BOLT-NEXT:     Bucket count: 11
 ; BOLT-NEXT:     Name count: 11
-; BOLT-NEXT:     Abbreviations table size: 0x19
+; BOLT-NEXT:     Abbreviations table size: 0x1F
 ; BOLT-NEXT:     Augmentation: 'BOLT'
 ; BOLT-NEXT:   }
 ; BOLT-NEXT:   Compilation Unit offsets [
@@ -37,16 +37,19 @@
 ; BOLT-NEXT:       Tag: DW_TAG_variable
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_base_type
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_subprogram
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
 ; BOLT-NEXT:   Bucket 0 [
@@ -58,6 +61,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_variable
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000029
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 2 {
@@ -68,6 +72,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000008c
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -80,6 +85,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_variable
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000029
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 4 {
@@ -90,6 +96,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_variable
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000001a
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -102,6 +109,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000034
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -114,6 +122,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000034
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -129,6 +138,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000034
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -141,12 +151,14 @@
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000025
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: [[ABBREV2]]
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000025
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -159,6 +171,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000034
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 10 {
@@ -169,6 +182,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000057
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -187,6 +201,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_variable
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000001a
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
diff --git a/bolt/test/X86/dwarf5-df-debug-names.test b/bolt/test/X86/dwarf5-df-debug-names.test
index 0352d0f..d53f2bd 100644
--- a/bolt/test/X86/dwarf5-df-debug-names.test
+++ b/bolt/test/X86/dwarf5-df-debug-names.test
@@ -14,136 +14,148 @@
 
 ; BOLT: [[OFFSET:0x[0-9a-f]*]]: Compile Unit
 ; BOLT: [[OFFSET1:0x[0-9a-f]*]]: Compile Unit
-: BOLT:       Name Index @ 0x0 {
-: BOLT-NEXT:   Header {
-: BOLT-NEXT:     Length: 0xF4
-: BOLT-NEXT:     Format: DWARF32
-: BOLT-NEXT:     Version: 5
-: BOLT-NEXT:     CU count: 2
-: BOLT-NEXT:     Local TU count: 0
-: BOLT-NEXT:     Foreign TU count: 0
-: BOLT-NEXT:     Bucket count: 7
-: BOLT-NEXT:     Name count: 7
-: BOLT-NEXT:     Abbreviations table size: 0x21
-: BOLT-NEXT:     Augmentation: 'BOLT'
-: BOLT-NEXT:   }
-: BOLT-NEXT:   Compilation Unit offsets [
-: BOLT-NEXT:     CU[0]: [[OFFSET]]
-: BOLT-NEXT:     CU[1]: [[OFFSET1]]
-: BOLT-NEXT:   ]
-: BOLT-NEXT:   Abbreviations [
-: BOLT-NEXT:     Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
-: BOLT-NEXT:       Tag: DW_TAG_structure_type
-: BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
-: BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
-: BOLT-NEXT:     }
-: BOLT-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
-: BOLT-NEXT:       Tag: DW_TAG_base_type
-: BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
-: BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
-: BOLT-NEXT:     }
-: BOLT-NEXT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
-: BOLT-NEXT:       Tag: DW_TAG_variable
-: BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
-: BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
-: BOLT-NEXT:     }
-: BOLT-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
-: BOLT-NEXT:       Tag: DW_TAG_subprogram
-: BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
-: BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
-: BOLT-NEXT:     }
-: BOLT-NEXT:   ]
-: BOLT-NEXT:   Bucket 0 [
-: BOLT-NEXT:     EMPTY
-: BOLT-NEXT:   ]
-: BOLT-NEXT:   Bucket 1 [
-: BOLT-NEXT:     Name 1 {
-: BOLT-NEXT:       Hash: 0x7C96E4DB
-: BOLT-NEXT:       String: {{.+}} "Foo2"
-: BOLT-NEXT:       Entry @ {{.+}} {
-: BOLT-NEXT:         Abbrev: [[ABBREV1]]
-: BOLT-NEXT:         Tag: DW_TAG_structure_type
-: BOLT-NEXT:         DW_IDX_compile_unit: 0x00
-: BOLT-NEXT:         DW_IDX_die_offset: 0x00000068
-: BOLT-NEXT:       }
-: BOLT-NEXT:     }
-: BOLT-NEXT:   ]
-: BOLT-NEXT:   Bucket 2 [
-: BOLT-NEXT:     Name 2 {
-: BOLT-NEXT:       Hash: 0xBA564846
-: BOLT-NEXT:       String: {{.+}} "Foo2Int"
-: BOLT-NEXT:       Entry @ {{.+}} {
-: BOLT-NEXT:         Abbrev: [[ABBREV1]]
-: BOLT-NEXT:         Tag: DW_TAG_structure_type
-: BOLT-NEXT:         DW_IDX_compile_unit: 0x01
-: BOLT-NEXT:         DW_IDX_die_offset: 0x00000025
-: BOLT-NEXT:       }
-: BOLT-NEXT:     }
-: BOLT-NEXT:   ]
-: BOLT-NEXT:   Bucket 3 [
-: BOLT-NEXT:     Name 3 {
-: BOLT-NEXT:       Hash: 0xB888030
-: BOLT-NEXT:       String: {{.+}} "int"
-: BOLT-NEXT:       Entry @ {{.+}} {
-: BOLT-NEXT:         Abbrev: [[ABBREV2]]
-: BOLT-NEXT:         Tag: DW_TAG_base_type
-: BOLT-NEXT:         DW_IDX_compile_unit: 0x01
-: BOLT-NEXT:         DW_IDX_die_offset: 0x00000043
-: BOLT-NEXT:       }
-: BOLT-NEXT:       Entry @ {{.+}} {
-: BOLT-NEXT:         Abbrev: [[ABBREV2]]
-: BOLT-NEXT:         Tag: DW_TAG_base_type
-: BOLT-NEXT:         DW_IDX_compile_unit: 0x00
-: BOLT-NEXT:         DW_IDX_die_offset: 0x00000056
-: BOLT-NEXT:       }
-: BOLT-NEXT:     }
-: BOLT-NEXT:     Name 4 {
-: BOLT-NEXT:       Hash: 0xF73809C
-: BOLT-NEXT:       String: {{.+}} "Foo2a"
-: BOLT-NEXT:       Entry @ {{.+}} {
-: BOLT-NEXT:         Abbrev: [[ABBREV1]]
-: BOLT-NEXT:         Tag: DW_TAG_structure_type
-: BOLT-NEXT:         DW_IDX_compile_unit: 0x00
-: BOLT-NEXT:         DW_IDX_die_offset: 0x00000078
-: BOLT-NEXT:       }
-: BOLT-NEXT:     }
-: BOLT-NEXT:     Name 5 {
-: BOLT-NEXT:       Hash: 0x7C96CB76
-: BOLT-NEXT:       String: {{.+}} "fint"
-: BOLT-NEXT:       Entry @ {{.+}} {
-: BOLT-NEXT:         Abbrev: [[ABBREV3]]
-: BOLT-NEXT:         Tag: DW_TAG_variable
-: BOLT-NEXT:         DW_IDX_compile_unit: 0x01
-: BOLT-NEXT:         DW_IDX_die_offset: 0x0000001a
-: BOLT-NEXT:       }
-: BOLT-NEXT:     }
-: BOLT-NEXT:     Name 6 {
-: BOLT-NEXT:       Hash: 0x7C9A7F6A
-: BOLT-NEXT:       String: {{.+}} "main"
-: BOLT-NEXT:       Entry @ {{.+}} {
-: BOLT-NEXT:         Abbrev: [[ABBREV4]]
-: BOLT-NEXT:         Tag: DW_TAG_subprogram
-: BOLT-NEXT:         DW_IDX_compile_unit: 0x00
-: BOLT-NEXT:         DW_IDX_die_offset: 0x0000001a
-: BOLT-NEXT:       }
-: BOLT-NEXT:     }
-: BOLT-NEXT:   ]
-: BOLT-NEXT:   Bucket 4 [
-: BOLT-NEXT:     EMPTY
-: BOLT-NEXT:   ]
-: BOLT-NEXT:   Bucket 5 [
-: BOLT-NEXT:     Name 7 {
-: BOLT-NEXT:       Hash: 0x7C952063
-: BOLT-NEXT:       String: {{.+}} "char"
-: BOLT-NEXT:       Entry @ {{.+}} {
-: BOLT-NEXT:         Abbrev: [[ABBREV2]]
-: BOLT-NEXT:         Tag: DW_TAG_base_type
-: BOLT-NEXT:         DW_IDX_compile_unit: 0x00
-: BOLT-NEXT:         DW_IDX_die_offset: 0x00000064
-: BOLT-NEXT:       }
-: BOLT-NEXT:     }
-: BOLT-NEXT:   ]
-: BOLT-NEXT:   Bucket 6 [
-: BOLT-NEXT:     EMPTY
-: BOLT-NEXT:   ]
-: BOLT-NEXT: }
+; BOLT:       Name Index @ 0x0 {
+; BOLT-NEXT:   Header {
+; BOLT-NEXT:     Length: 0xFC
+; BOLT-NEXT:     Format: DWARF32
+; BOLT-NEXT:     Version: 5
+; BOLT-NEXT:     CU count: 2
+; BOLT-NEXT:     Local TU count: 0
+; BOLT-NEXT:     Foreign TU count: 0
+; BOLT-NEXT:     Bucket count: 7
+; BOLT-NEXT:     Name count: 7
+; BOLT-NEXT:     Abbreviations table size: 0x29
+; BOLT-NEXT:     Augmentation: 'BOLT'
+; BOLT-NEXT:   }
+; BOLT-NEXT:   Compilation Unit offsets [
+; BOLT-NEXT:     CU[0]: [[OFFSET]]
+; BOLT-NEXT:     CU[1]: [[OFFSET1]]
+; BOLT-NEXT:   ]
+; BOLT-NEXT:   Abbreviations [
+; BOLT-NEXT:     Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
+; BOLT-NEXT:       Tag: DW_TAG_structure_type
+; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
+; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
+; BOLT-NEXT:     }
+; BOLT-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
+; BOLT-NEXT:       Tag: DW_TAG_base_type
+; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
+; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
+; BOLT-NEXT:     }
+; BOLT-NEXT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
+; BOLT-NEXT:       Tag: DW_TAG_variable
+; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
+; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
+; BOLT-NEXT:     }
+; BOLT-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; BOLT-NEXT:       Tag: DW_TAG_subprogram
+; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
+; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
+; BOLT-NEXT:     }
+; BOLT-NEXT:   ]
+; BOLT-NEXT:   Bucket 0 [
+; BOLT-NEXT:     EMPTY
+; BOLT-NEXT:   ]
+; BOLT-NEXT:   Bucket 1 [
+; BOLT-NEXT:     Name 1 {
+; BOLT-NEXT:       Hash: 0x7C96E4DB
+; BOLT-NEXT:       String: {{.+}} "Foo2"
+; BOLT-NEXT:       Entry @ {{.+}} {
+; BOLT-NEXT:         Abbrev: [[ABBREV1]]
+; BOLT-NEXT:         Tag: DW_TAG_structure_type
+; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
+; BOLT-NEXT:         DW_IDX_die_offset: 0x00000068
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
+; BOLT-NEXT:       }
+; BOLT-NEXT:     }
+; BOLT-NEXT:   ]
+; BOLT-NEXT:   Bucket 2 [
+; BOLT-NEXT:     Name 2 {
+; BOLT-NEXT:       Hash: 0xBA564846
+; BOLT-NEXT:       String: {{.+}} "Foo2Int"
+; BOLT-NEXT:       Entry @ {{.+}} {
+; BOLT-NEXT:         Abbrev: [[ABBREV1]]
+; BOLT-NEXT:         Tag: DW_TAG_structure_type
+; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
+; BOLT-NEXT:         DW_IDX_die_offset: 0x00000025
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
+; BOLT-NEXT:       }
+; BOLT-NEXT:     }
+; BOLT-NEXT:   ]
+; BOLT-NEXT:   Bucket 3 [
+; BOLT-NEXT:     Name 3 {
+; BOLT-NEXT:       Hash: 0xB888030
+; BOLT-NEXT:       String: {{.+}} "int"
+; BOLT-NEXT:       Entry @ {{.+}} {
+; BOLT-NEXT:         Abbrev: [[ABBREV2]]
+; BOLT-NEXT:         Tag: DW_TAG_base_type
+; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
+; BOLT-NEXT:         DW_IDX_die_offset: 0x00000043
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
+; BOLT-NEXT:       }
+; BOLT-NEXT:       Entry @ {{.+}} {
+; BOLT-NEXT:         Abbrev: [[ABBREV2]]
+; BOLT-NEXT:         Tag: DW_TAG_base_type
+; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
+; BOLT-NEXT:         DW_IDX_die_offset: 0x00000056
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
+; BOLT-NEXT:       }
+; BOLT-NEXT:     }
+; BOLT-NEXT:     Name 4 {
+; BOLT-NEXT:       Hash: 0xF73809C
+; BOLT-NEXT:       String: {{.+}} "Foo2a"
+; BOLT-NEXT:       Entry @ {{.+}} {
+; BOLT-NEXT:         Abbrev: [[ABBREV1]]
+; BOLT-NEXT:         Tag: DW_TAG_structure_type
+; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
+; BOLT-NEXT:         DW_IDX_die_offset: 0x00000078
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
+; BOLT-NEXT:       }
+; BOLT-NEXT:     }
+; BOLT-NEXT:     Name 5 {
+; BOLT-NEXT:       Hash: 0x7C96CB76
+; BOLT-NEXT:       String: {{.+}} "fint"
+; BOLT-NEXT:       Entry @ {{.+}} {
+; BOLT-NEXT:         Abbrev: [[ABBREV3]]
+; BOLT-NEXT:         Tag: DW_TAG_variable
+; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
+; BOLT-NEXT:         DW_IDX_die_offset: 0x0000001a
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
+; BOLT-NEXT:       }
+; BOLT-NEXT:     }
+; BOLT-NEXT:     Name 6 {
+; BOLT-NEXT:       Hash: 0x7C9A7F6A
+; BOLT-NEXT:       String: {{.+}} "main"
+; BOLT-NEXT:       Entry @ {{.+}} {
+; BOLT-NEXT:         Abbrev: [[ABBREV4]]
+; BOLT-NEXT:         Tag: DW_TAG_subprogram
+; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
+; BOLT-NEXT:         DW_IDX_die_offset: 0x0000001a
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
+; BOLT-NEXT:       }
+; BOLT-NEXT:     }
+; BOLT-NEXT:   ]
+; BOLT-NEXT:   Bucket 4 [
+; BOLT-NEXT:     EMPTY
+; BOLT-NEXT:   ]
+; BOLT-NEXT:   Bucket 5 [
+; BOLT-NEXT:     Name 7 {
+; BOLT-NEXT:       Hash: 0x7C952063
+; BOLT-NEXT:       String: {{.+}} "char"
+; BOLT-NEXT:       Entry @ {{.+}} {
+; BOLT-NEXT:         Abbrev: [[ABBREV2]]
+; BOLT-NEXT:         Tag: DW_TAG_base_type
+; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
+; BOLT-NEXT:         DW_IDX_die_offset: 0x00000064
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
+; BOLT-NEXT:       }
+; BOLT-NEXT:     }
+; BOLT-NEXT:   ]
+; BOLT-NEXT:   Bucket 6 [
+; BOLT-NEXT:     EMPTY
+; BOLT-NEXT:   ]
+; BOLT-NEXT: }
diff --git a/bolt/test/X86/dwarf5-df-main-debug-names-ftu-ltu-mix.test b/bolt/test/X86/dwarf5-df-main-debug-names-ftu-ltu-mix.test
index 8a8a4b1..3a0260f 100644
--- a/bolt/test/X86/dwarf5-df-main-debug-names-ftu-ltu-mix.test
+++ b/bolt/test/X86/dwarf5-df-main-debug-names-ftu-ltu-mix.test
@@ -31,6 +31,7 @@
 ; BOLT-NEXT:       Tag: DW_TAG_variable
 ; BOLT-NEXT:       DW_IDX_compile_unit: 0x02
 ; BOLT-NEXT:       DW_IDX_die_offset: 0x0000001e
+; BOLT-NEXT:       DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   }
 ; BOLT:        Name 6 {
@@ -42,6 +43,7 @@
 ; BOLT-NEXT:       DW_IDX_type_unit: 0x02
 ; BOLT-NEXT:       DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:       DW_IDX_die_offset: 0x00000021
+; BOLT-NEXT:       DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   }
 ; BOLT:        Name 7 {
@@ -52,5 +54,6 @@
 ; BOLT-NEXT:       Tag: DW_TAG_structure_type
 ; BOLT-NEXT:       DW_IDX_type_unit: 0x00
 ; BOLT-NEXT:       DW_IDX_die_offset: 0x00000023
+; BOLT-NEXT:       DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   }
diff --git a/bolt/test/X86/dwarf5-df-one-cu-debug-names.test b/bolt/test/X86/dwarf5-df-one-cu-debug-names.test
index 246ce7e..bf1cad8 100644
--- a/bolt/test/X86/dwarf5-df-one-cu-debug-names.test
+++ b/bolt/test/X86/dwarf5-df-one-cu-debug-names.test
@@ -13,7 +13,7 @@
 ; BOLT: [[OFFSET:0x[0-9a-f]*]]: Compile Unit
 ; BOLT:       Name Index @ 0x0 {
 ; BOLT-NEXT:   Header {
-; BOLT-NEXT:     Length: 0xA9
+; BOLT-NEXT:     Length: 0xAF
 ; BOLT-NEXT:     Format: DWARF32
 ; BOLT-NEXT:     Version: 5
 ; BOLT-NEXT:     CU count: 1
@@ -21,7 +21,7 @@
 ; BOLT-NEXT:     Foreign TU count: 0
 ; BOLT-NEXT:     Bucket count: 5
 ; BOLT-NEXT:     Name count: 5
-; BOLT-NEXT:     Abbreviations table size: 0x13
+; BOLT-NEXT:     Abbreviations table size: 0x19
 ; BOLT-NEXT:     Augmentation: 'BOLT'
 ; BOLT-NEXT:   }
 ; BOLT-NEXT:   Compilation Unit offsets [
@@ -31,14 +31,17 @@
 ; BOLT-NEXT:     Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_structure_type
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_subprogram
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_base_type
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
 ; BOLT-NEXT:   Bucket 0 [
@@ -52,6 +55,7 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV1]]
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000068
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 2 {
@@ -61,6 +65,7 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV2]]
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000001a
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -75,6 +80,7 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV3]]
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000056
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -86,6 +92,7 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV1]]
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000078
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 5 {
@@ -95,6 +102,7 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV3]]
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000064
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
diff --git a/bolt/test/X86/dwarf5-df-types-debug-names.test b/bolt/test/X86/dwarf5-df-types-debug-names.test
index fdb962b..f5a2c9c 100644
--- a/bolt/test/X86/dwarf5-df-types-debug-names.test
+++ b/bolt/test/X86/dwarf5-df-types-debug-names.test
@@ -25,7 +25,7 @@
 
 ; BOLT:       Name Index @ 0x0 {
 ; BOLT-NEXT:   Header {
-; BOLT-NEXT:     Length: 0x174
+; BOLT-NEXT:     Length: 0x17E
 ; BOLT-NEXT:     Format: DWARF32
 ; BOLT-NEXT:     Version: 5
 ; BOLT-NEXT:     CU count: 2
@@ -33,7 +33,7 @@
 ; BOLT-NEXT:     Foreign TU count: 4
 ; BOLT-NEXT:     Bucket count: 9
 ; BOLT-NEXT:     Name count: 9
-; BOLT-NEXT:     Abbreviations table size: 0x2D
+; BOLT-NEXT:     Abbreviations table size: 0x37
 ; BOLT-NEXT:     Augmentation: 'BOLT'
 ; BOLT-NEXT:   }
 ; BOLT-NEXT:   Compilation Unit offsets [
@@ -52,27 +52,32 @@
 ; BOLT-NEXT:       DW_IDX_type_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_subprogram
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_variable
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:         DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_base_type
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_base_type
 ; BOLT-NEXT:       DW_IDX_type_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
 ; BOLT-NEXT:   Bucket 0 [
@@ -88,6 +93,7 @@
 ; BOLT-NEXT:         DW_IDX_type_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000021
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 2 {
@@ -98,6 +104,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000029
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 3 {
@@ -108,6 +115,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_variable
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000001a
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -120,6 +128,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000025
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: 0x5
@@ -127,12 +136,14 @@
 ; BOLT-NEXT:         DW_IDX_type_unit: 0x02
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000003f
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: [[ABBREV3]]
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000056
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -145,6 +156,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000029
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 6 {
@@ -156,6 +168,7 @@
 ; BOLT-NEXT:         DW_IDX_type_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000021
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: [[ABBREV]]
@@ -163,6 +176,7 @@
 ; BOLT-NEXT:         DW_IDX_type_unit: 0x03
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000021
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 7 {
@@ -174,6 +188,7 @@
 ; BOLT-NEXT:         DW_IDX_type_unit: 0x02
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000021
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -195,6 +210,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000001a
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -208,6 +224,7 @@
 ; BOLT-NEXT:         DW_IDX_type_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000036
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: 0x5
@@ -215,6 +232,7 @@
 ; BOLT-NEXT:         DW_IDX_type_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000048
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: 0x5
@@ -222,12 +240,14 @@
 ; BOLT-NEXT:         DW_IDX_type_unit: 0x03
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000048
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: [[ABBREV3]]
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_compile_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000064
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
diff --git a/bolt/test/X86/dwarf5-df-types-one-cu-debug-names.test b/bolt/test/X86/dwarf5-df-types-one-cu-debug-names.test
index 1c77583..2a489d8 100644
--- a/bolt/test/X86/dwarf5-df-types-one-cu-debug-names.test
+++ b/bolt/test/X86/dwarf5-df-types-one-cu-debug-names.test
@@ -17,7 +17,7 @@
 ; BOLT: [[OFFSET:0x[0-9a-f]*]]: Compile Unit
 ; BOLT:       Name Index @ 0x0 {
 ; BOLT-NEXT:   Header {
-; BOLT-NEXT:     Length: 0xD1
+; BOLT-NEXT:     Length: 0xD9
 ; BOLT-NEXT:     Format: DWARF32
 ; BOLT-NEXT:     Version: 5
 ; BOLT-NEXT:     CU count: 1
@@ -25,7 +25,7 @@
 ; BOLT-NEXT:     Foreign TU count: 2
 ; BOLT-NEXT:     Bucket count: 5
 ; BOLT-NEXT:     Name count: 5
-; BOLT-NEXT:     Abbreviations table size: 0x1D
+; BOLT-NEXT:     Abbreviations table size: 0x25
 ; BOLT-NEXT:     Augmentation: 'BOLT'
 ; BOLT-NEXT:   }
 ; BOLT-NEXT:   Compilation Unit offsets [
@@ -40,19 +40,23 @@
 ; BOLT-NEXT:       Tag: DW_TAG_structure_type
 ; BOLT-NEXT:       DW_IDX_type_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_subprogram
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_base_type
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_base_type
 ; BOLT-NEXT:       DW_IDX_type_unit: DW_FORM_data1
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
 ; BOLT-NEXT:   Bucket 0 [
@@ -67,6 +71,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_type_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000021
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 2 {
@@ -76,6 +81,7 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV2]]
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000001a
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -90,6 +96,7 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV3]]
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000056
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -102,6 +109,7 @@
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_type_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000021
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 5 {
@@ -112,17 +120,20 @@
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_type_unit: 0x00
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000036
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: [[ABBREV4]]
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_type_unit: 0x01
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000048
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
 ; BOLT-NEXT:         Abbrev: [[ABBREV3]]
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000064
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
diff --git a/bolt/test/X86/dwarf5-one-cu-debug-names.test b/bolt/test/X86/dwarf5-one-cu-debug-names.test
index e775452..2145d81 100644
--- a/bolt/test/X86/dwarf5-one-cu-debug-names.test
+++ b/bolt/test/X86/dwarf5-one-cu-debug-names.test
@@ -9,7 +9,7 @@
 ; BOLT: [[OFFSET1:0x[0-9a-f]*]]: Compile Unit
 ; BOLT:       Name Index @ 0x0 {
 ; BOLT-NEXT:   Header {
-; BOLT-NEXT:     Length: 0x13E
+; BOLT-NEXT:     Length: 0x154
 ; BOLT-NEXT:     Format: DWARF32
 ; BOLT-NEXT:     Version: 5
 ; BOLT-NEXT:     CU count: 1
@@ -17,7 +17,7 @@
 ; BOLT-NEXT:     Foreign TU count: 0
 ; BOLT-NEXT:     Bucket count: 11
 ; BOLT-NEXT:     Name count: 11
-; BOLT-NEXT:     Abbreviations table size: 0x1F
+; BOLT-NEXT:     Abbreviations table size: 0x31
 ; BOLT-NEXT:     Augmentation: 'BOLT'
 ; BOLT-NEXT:   }
 ; BOLT-NEXT:   Compilation Unit offsets [
@@ -27,22 +27,32 @@
 ; BOLT-NEXT:     Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_structure_type
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_base_type
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
+; BOLT-NEXT:        Tag: DW_TAG_structure_type
+; BOLT-NEXT:        DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:        DW_IDX_parent: DW_FORM_ref4
+; BOLT-NEXT:     }
+; BOLT-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_variable
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
-; BOLT-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; BOLT-NEXT:     Abbreviation [[ABBREV5:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_subprogram
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
-; BOLT-NEXT:     Abbreviation [[ABBREV5:0x[0-9a-f]*]] {
+; BOLT-NEXT:     Abbreviation [[ABBREV6:0x[0-9a-f]*]] {
 ; BOLT-NEXT:       Tag: DW_TAG_namespace
 ; BOLT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
 ; BOLT-NEXT:   Bucket 0 [
@@ -53,6 +63,7 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV1]]
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000104
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 2 {
@@ -62,6 +73,7 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV2]]
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x000000c5
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -73,6 +85,7 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV1]]
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x000000c9
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 4 {
@@ -82,6 +95,7 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV1]]
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000003f
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -93,6 +107,7 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV1]]
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x000000eb
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -107,18 +122,20 @@
 ; BOLT-NEXT:       Hash: 0x59796A
 ; BOLT-NEXT:       String: {{.+}} "t1"
 ; BOLT-NEXT:       Entry @ {{.+}} {
-; BOLT-NEXT:         Abbrev: [[ABBREV1]]
+; BOLT-NEXT:         Abbrev: [[ABBREV3]]
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000062
+; BOLT-NEXT:         DW_IDX_parent: Entry @ 0x14d
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 7 {
 ; BOLT-NEXT:       Hash: 0x5979AC
 ; BOLT-NEXT:       String: {{.+}} "v1"
 ; BOLT-NEXT:       Entry @ {{.+}} {
-; BOLT-NEXT:         Abbrev: [[ABBREV3]]
+; BOLT-NEXT:         Abbrev: [[ABBREV4]]
 ; BOLT-NEXT:         Tag: DW_TAG_variable
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000024
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -130,6 +147,7 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV2]]
 ; BOLT-NEXT:         Tag: DW_TAG_base_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000005d
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -141,15 +159,17 @@
 ; BOLT-NEXT:         Abbrev: [[ABBREV1]]
 ; BOLT-NEXT:         Tag: DW_TAG_structure_type
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x0000002f
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:     Name 10 {
 ; BOLT-NEXT:       Hash: 0x7C9A7F6A
 ; BOLT-NEXT:       String: {{.+}} "main"
 ; BOLT-NEXT:       Entry @ {{.+}} {
-; BOLT-NEXT:         Abbrev: [[ABBREV4]]
+; BOLT-NEXT:         Abbrev: [[ABBREV5]]
 ; BOLT-NEXT:         Tag: DW_TAG_subprogram
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000073
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
@@ -157,15 +177,17 @@
 ; BOLT-NEXT:     Name 11 {
 ; BOLT-NEXT:       Hash: 0x8CFC710C
 ; BOLT-NEXT:       String: {{.+}} "(anonymous namespace)"
-; BOLT-NEXT:       Entry @ {{.+}} {
-; BOLT-NEXT:         Abbrev: [[ABBREV5]]
+; BOLT-NEXT:       Entry @ 0x14d {
+; BOLT-NEXT:         Abbrev: [[ABBREV6]]
 ; BOLT-NEXT:         Tag: DW_TAG_namespace
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000061
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:       Entry @ {{.+}} {
-; BOLT-NEXT:         Abbrev: [[ABBREV5]]
+; BOLT-NEXT:         Abbrev: [[ABBREV6]]
 ; BOLT-NEXT:         Tag: DW_TAG_namespace
 ; BOLT-NEXT:         DW_IDX_die_offset: 0x00000061
+; BOLT-NEXT:         DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:       }
 ; BOLT-NEXT:     }
 ; BOLT-NEXT:   ]
diff --git a/bolt/test/X86/dwarf5-types-debug-names.test b/bolt/test/X86/dwarf5-types-debug-names.test
index 6a26477..9462429 100644
--- a/bolt/test/X86/dwarf5-types-debug-names.test
+++ b/bolt/test/X86/dwarf5-types-debug-names.test
@@ -14,7 +14,7 @@
 
 ; BOLT: Name Index @ 0x0 {
 ; BOLT:   Header {
-; BOLT:     Length: 0xE1
+; BOLT:     Length: 0xE9
 ; BOLT:     Format: DWARF32
 ; BOLT:     Version: 5
 ; BOLT:     CU count: 2
@@ -22,7 +22,7 @@
 ; BOLT:     Foreign TU count: 0
 ; BOLT:     Bucket count: 6
 ; BOLT:     Name count: 6
-; BOLT:     Abbreviations table size: 0x21
+; BOLT:     Abbreviations table size: 0x29
 ; BOLT:     Augmentation: 'BOLT'
 ; BOLT:   }
 ; BOLT:   Compilation Unit offsets [
@@ -37,21 +37,25 @@
 ; BOLT:       Tag: DW_TAG_structure_type
 ; BOLT:       DW_IDX_type_unit: DW_FORM_data1
 ; BOLT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT:     }
 ; BOLT:     Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
 ; BOLT:       Tag: DW_TAG_subprogram
 ; BOLT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT:     }
 ; BOLT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
 ; BOLT:       Tag: DW_TAG_base_type
 ; BOLT:       DW_IDX_compile_unit: DW_FORM_data1
 ; BOLT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT:     }
 ; BOLT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
 ; BOLT:       Tag: DW_TAG_base_type
 ; BOLT:       DW_IDX_type_unit: DW_FORM_data1
 ; BOLT:       DW_IDX_die_offset: DW_FORM_ref4
+; BOLT:       DW_IDX_parent: DW_FORM_flag_present
 ; BOLT:     }
 ; BOLT:   ]
 ; BOLT:   Bucket 0 [
@@ -63,6 +67,7 @@
 ; BOLT:         Tag: DW_TAG_structure_type
 ; BOLT:         DW_IDX_type_unit: 0x00
 ; BOLT:         DW_IDX_die_offset: 0x00000023
+; BOLT:         DW_IDX_parent: <parent not indexed>
 ; BOLT:       }
 ; BOLT:     }
 ; BOLT:   ]
@@ -75,6 +80,7 @@
 ; BOLT:         Tag: DW_TAG_subprogram
 ; BOLT:         DW_IDX_compile_unit: 0x01
 ; BOLT:         DW_IDX_die_offset: 0x00000024
+; BOLT:         DW_IDX_parent: <parent not indexed>
 ; BOLT:       }
 ; BOLT:     }
 ; BOLT:   ]
@@ -87,6 +93,7 @@
 ; BOLT:         Tag: DW_TAG_base_type
 ; BOLT:         DW_IDX_compile_unit: 0x01
 ; BOLT:         DW_IDX_die_offset: 0x00000040
+; BOLT:         DW_IDX_parent: <parent not indexed>
 ; BOLT:       }
 ; BOLT:     }
 ; BOLT:   ]
@@ -99,6 +106,7 @@
 ; BOLT:         Tag: DW_TAG_subprogram
 ; BOLT:         DW_IDX_compile_unit: 0x01
 ; BOLT:         DW_IDX_die_offset: 0x00000024
+; BOLT:         DW_IDX_parent: <parent not indexed>
 ; BOLT:       }
 ; BOLT:     }
 ; BOLT:   ]
@@ -111,6 +119,7 @@
 ; BOLT:         Tag: DW_TAG_subprogram
 ; BOLT:         DW_IDX_compile_unit: 0x00
 ; BOLT:         DW_IDX_die_offset: 0x00000024
+; BOLT:         DW_IDX_parent: <parent not indexed>
 ; BOLT:       }
 ; BOLT:     }
 ; BOLT:   ]
@@ -123,6 +132,7 @@
 ; BOLT:         Tag: DW_TAG_base_type
 ; BOLT:         DW_IDX_type_unit: 0x00
 ; BOLT:         DW_IDX_die_offset: 0x00000038
+; BOLT:         DW_IDX_parent: <parent not indexed>
 ; BOLT:       }
 ; BOLT:     }
 ; BOLT:   ]
diff --git a/bolt/test/X86/dwarf5-types-one-cu-debug-names.test b/bolt/test/X86/dwarf5-types-one-cu-debug-names.test
index 00a1319..02c0ef8 100644
--- a/bolt/test/X86/dwarf5-types-one-cu-debug-names.test
+++ b/bolt/test/X86/dwarf5-types-one-cu-debug-names.test
@@ -11,7 +11,7 @@
 
 ; BOLT:Name Index @ 0x0 {
 ; BOLT-NEXT:  Header {
-; BOLT-NEXT:    Length: 0xA3
+; BOLT-NEXT:    Length: 0xAB
 ; BOLT-NEXT:    Format: DWARF32
 ; BOLT-NEXT:    Version: 5
 ; BOLT-NEXT:    CU count: 1
@@ -19,7 +19,7 @@
 ; BOLT-NEXT:    Foreign TU count: 0
 ; BOLT-NEXT:    Bucket count: 4
 ; BOLT-NEXT:    Name count: 4
-; BOLT-NEXT:    Abbreviations table size: 0x1D
+; BOLT-NEXT:    Abbreviations table size: 0x25
 ; BOLT-NEXT:    Augmentation: 'BOLT'
 ; BOLT-NEXT:  }
 ; BOLT-NEXT:  Compilation Unit offsets [
@@ -32,20 +32,24 @@
 ; BOLT-NEXT:    Abbreviation [[ABBREV:0x[0-9a-f]*]] {
 ; BOLT-NEXT:      Tag: DW_TAG_base_type
 ; BOLT-NEXT:      DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:      DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:    }
 ; BOLT-NEXT:    Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
 ; BOLT-NEXT:      Tag: DW_TAG_structure_type
 ; BOLT-NEXT:      DW_IDX_type_unit: DW_FORM_data1
 ; BOLT-NEXT:      DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:      DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:    }
 ; BOLT-NEXT:    Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
 ; BOLT-NEXT:      Tag: DW_TAG_subprogram
 ; BOLT-NEXT:      DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:      DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:    }
 ; BOLT-NEXT:    Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
 ; BOLT-NEXT:      Tag: DW_TAG_base_type
 ; BOLT-NEXT:      DW_IDX_type_unit: DW_FORM_data1
 ; BOLT-NEXT:      DW_IDX_die_offset: DW_FORM_ref4
+; BOLT-NEXT:      DW_IDX_parent: DW_FORM_flag_present
 ; BOLT-NEXT:    }
 ; BOLT-NEXT:  ]
 ; BOLT-NEXT:  Bucket 0 [
@@ -56,6 +60,7 @@
 ; BOLT-NEXT:        Abbrev: [[ABBREV]]
 ; BOLT-NEXT:        Tag: DW_TAG_base_type
 ; BOLT-NEXT:        DW_IDX_die_offset: 0x0000003f
+; BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:      }
 ; BOLT-NEXT:    }
 ; BOLT-NEXT:    Name 2 {
@@ -66,6 +71,7 @@
 ; BOLT-NEXT:        Tag: DW_TAG_structure_type
 ; BOLT-NEXT:        DW_IDX_type_unit: 0x00
 ; BOLT-NEXT:        DW_IDX_die_offset: 0x00000023
+; BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:      }
 ; BOLT-NEXT:    }
 ; BOLT-NEXT:  ]
@@ -80,6 +86,7 @@
 ; BOLT-NEXT:        Abbrev: [[ABBREV2]]
 ; BOLT-NEXT:        Tag: DW_TAG_subprogram
 ; BOLT-NEXT:        DW_IDX_die_offset: 0x00000024
+; BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:      }
 ; BOLT-NEXT:    }
 ; BOLT-NEXT:  ]
@@ -92,6 +99,7 @@
 ; BOLT-NEXT:        Tag: DW_TAG_base_type
 ; BOLT-NEXT:        DW_IDX_type_unit: 0x00
 ; BOLT-NEXT:        DW_IDX_die_offset: 0x00000038
+; BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
 ; BOLT-NEXT:      }
 ; BOLT-NEXT:    }
 ; BOLT-NEXT:  ]
diff --git a/bolt/test/X86/reader-stale-yaml.test b/bolt/test/X86/reader-stale-yaml.test
index 533ed91..f4a8865 100644
--- a/bolt/test/X86/reader-stale-yaml.test
+++ b/bolt/test/X86/reader-stale-yaml.test
@@ -14,6 +14,10 @@ RUN:   --profile-ignore-hash=1 --profile-use-dfs=0 --debug-only=bolt-prof 2>&1 |
 RUN: llvm-bolt %t.exe -o %t.null --b %p/Inputs/blarge_profile_stale.yaml \
 RUN:   --print-cfg --print-only=SolveCubic --infer-stale-profile=1 \
 RUN:   --profile-ignore-hash=1 --profile-use-dfs=0 --debug-only=bolt-prof 2>&1 | FileCheck %s -check-prefix=CHECK2
+# Testing skipped function
+RUN: llvm-bolt %t.exe -o %t.null --b %p/Inputs/blarge_profile_stale.yaml \
+RUN:   --print-cfg --print-only=usqrt --infer-stale-profile=1 --skip-funcs=usqrt \
+RUN:   --profile-ignore-hash=1 --profile-use-dfs=0
 
 CHECK0: BOLT-INFO: 2 out of 7 functions in the binary (28.6%) have non-empty execution profile
 CHECK0: BOLT-WARNING: 2 (100.0% of all profiled) functions have invalid (possibly stale) profile
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp b/bolt/unittests/Core/MCPlusBuilder.cpp
index 6344803..daf9f39 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -134,9 +134,8 @@ TEST_P(MCPlusBuilderTester, ReplaceRegWithImm) {
 
 TEST_P(MCPlusBuilderTester, Annotation) {
   MCInst Inst;
-  bool Success = BC->MIB->createTailCall(Inst, BC->Ctx->createNamedTempSymbol(),
-                                         BC->Ctx.get());
-  ASSERT_TRUE(Success);
+  BC->MIB->createTailCall(Inst, BC->Ctx->createNamedTempSymbol(),
+                          BC->Ctx.get());
   MCSymbol *LPSymbol = BC->Ctx->createNamedTempSymbol("LP");
   uint64_t Value = INT32_MIN;
   // Test encodeAnnotationImm using this indirect way
@@ -151,9 +150,8 @@ TEST_P(MCPlusBuilderTester, Annotation) {
   // Large int64 should trigger an out of range assertion
   Value = 0x1FF'FFFF'FFFF'FFFFULL;
   Inst.clear();
-  Success = BC->MIB->createTailCall(Inst, BC->Ctx->createNamedTempSymbol(),
-                                    BC->Ctx.get());
-  ASSERT_TRUE(Success);
+  BC->MIB->createTailCall(Inst, BC->Ctx->createNamedTempSymbol(),
+                          BC->Ctx.get());
   ASSERT_DEATH(BC->MIB->addEHInfo(Inst, MCPlus::MCLandingPad(LPSymbol, Value)),
                "annotation value out of range");
 }
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 06af93f..13d7261 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -13,6 +13,7 @@ Clang Language Extensions
    BlockLanguageSpec
    Block-ABI-Apple
    AutomaticReferenceCounting
+   PointerAuthentication
    MatrixTypes
 
 Introduction
@@ -3443,6 +3444,21 @@ Query for this feature with ``__has_builtin(__builtin_debugtrap)``.
 
 Query for this feature with ``__has_builtin(__builtin_trap)``.
 
+``__builtin_arm_trap``
+----------------------
+
+``__builtin_arm_trap`` is an AArch64 extension to ``__builtin_trap`` which also accepts a compile-time constant value, encoded directly into the trap instruction for later inspection.
+
+**Syntax**:
+
+.. code-block:: c++
+
+    __builtin_arm_trap(const unsigned short payload)
+
+**Description**
+
+``__builtin_arm_trap`` is lowered to the ``llvm.aarch64.break`` builtin, and then to ``brk #payload``.
+
 ``__builtin_nondeterministic_value``
 ------------------------------------
 
@@ -4303,6 +4319,10 @@ reordering of memory accesses and side effect instructions. Other instructions
 like simple arithmetic may be reordered around the intrinsic. If you expect to
 have no reordering at all, use inline assembly instead.
 
+Pointer Authentication
+^^^^^^^^^^^^^^^^^^^^^^
+See :doc:`PointerAuthentication`.
+
 X86/X86-64 Language Extensions
 ------------------------------
 
diff --git a/clang/docs/PointerAuthentication.rst b/clang/docs/PointerAuthentication.rst
new file mode 100644
index 0000000..19b3384
--- /dev/null
+++ b/clang/docs/PointerAuthentication.rst
@@ -0,0 +1,485 @@
+Pointer Authentication
+======================
+
+.. contents::
+   :local:
+
+Introduction
+------------
+
+Pointer authentication is a technology which offers strong probabilistic
+protection against exploiting a broad class of memory bugs to take control of
+program execution.  When adopted consistently in a language ABI, it provides
+a form of relatively fine-grained control flow integrity (CFI) check that
+resists both return-oriented programming (ROP) and jump-oriented programming
+(JOP) attacks.
+
+While pointer authentication can be implemented purely in software, direct
+hardware support (e.g. as provided by Armv8.3 PAuth) can dramatically improve
+performance and code size.  Similarly, while pointer authentication
+can be implemented on any architecture, taking advantage of the (typically)
+excess addressing range of a target with 64-bit pointers minimizes the impact
+on memory performance and can allow interoperation with existing code (by
+disabling pointer authentication dynamically).  This document will generally
+attempt to present the pointer authentication feature independent of any
+hardware implementation or ABI.  Considerations that are
+implementation-specific are clearly identified throughout.
+
+Note that there are several different terms in use:
+
+- **Pointer authentication** is a target-independent language technology.
+
+- **PAuth** (sometimes referred to as **PAC**, for Pointer Authentication
+  Codes) is an AArch64 architecture extension that provides hardware support
+  for pointer authentication.  Additional extensions either modify some of the
+  PAuth instruction behavior (notably FPAC), or provide new instruction
+  variants (PAuth_LR).
+
+- **Armv8.3** is an AArch64 architecture revision that makes PAuth mandatory.
+
+- **arm64e** is a specific ABI (not yet fully stable) for implementing pointer
+  authentication using PAuth on certain Apple operating systems.
+
+This document serves four purposes:
+
+- It describes the basic ideas of pointer authentication.
+
+- It documents several language extensions that are useful on targets using
+  pointer authentication.
+
+- It will eventually present a theory of operation for the security mitigation,
+  describing the basic requirements for correctness, various weaknesses in the
+  mechanism, and ways in which programmers can strengthen its protections
+  (including recommendations for language implementors).
+
+- It will eventually document the language ABIs currently used for C, C++,
+  Objective-C, and Swift on arm64e, although these are not yet stable on any
+  target.
+
+Basic Concepts
+--------------
+
+The simple address of an object or function is a **raw pointer**.  A raw
+pointer can be **signed** to produce a **signed pointer**.  A signed pointer
+can be then **authenticated** in order to verify that it was **validly signed**
+and extract the original raw pointer.  These terms reflect the most likely
+implementation technique: computing and storing a cryptographic signature along
+with the pointer.
+
+An **abstract signing key** is a name which refers to a secret key which is
+used to sign and authenticate pointers.  The concrete key value for a
+particular name is consistent throughout a process.
+
+A **discriminator** is an arbitrary value used to **diversify** signed pointers
+so that one validly-signed pointer cannot simply be copied over another.
+A discriminator is simply opaque data of some implementation-defined size that
+is included in the signature as a salt (see `Discriminators`_ for details.)
+
+Nearly all aspects of pointer authentication use just these two primary
+operations:
+
+- ``sign(raw_pointer, key, discriminator)`` produces a signed pointer given
+  a raw pointer, an abstract signing key, and a discriminator.
+
+- ``auth(signed_pointer, key, discriminator)`` produces a raw pointer given
+  a signed pointer, an abstract signing key, and a discriminator.
+
+``auth(sign(raw_pointer, key, discriminator), key, discriminator)`` must
+succeed and produce ``raw_pointer``.  ``auth`` applied to a value that was
+ultimately produced in any other way is expected to fail, which halts the
+program either:
+
+- immediately, on implementations that enforce ``auth`` success (e.g., when
+  using compiler-generated ``auth`` failure checks, or Armv8.3 with the FPAC
+  extension), or
+
+- when the resulting pointer value is used, on implementations that don't.
+
+However, regardless of the implementation's handling of ``auth`` failures, it
+is permitted for ``auth`` to fail to detect that a signed pointer was not
+produced in this way, in which case it may return anything; this is what makes
+pointer authentication a probabilistic mitigation rather than a perfect one.
+
+There are two secondary operations which are required only to implement certain
+intrinsics in ``<ptrauth.h>``:
+
+- ``strip(signed_pointer, key)`` produces a raw pointer given a signed pointer
+  and a key without verifying its validity, unlike ``auth``.  This is useful
+  for certain kinds of tooling, such as crash backtraces; it should generally
+  not be used in the basic language ABI except in very careful ways.
+
+- ``sign_generic(value)`` produces a cryptographic signature for arbitrary
+  data, not necessarily a pointer.  This is useful for efficiently verifying
+  that non-pointer data has not been tampered with.
+
+Whenever any of these operations is called for, the key value must be known
+statically.  This is because the layout of a signed pointer may vary according
+to the signing key.  (For example, in Armv8.3, the layout of a signed pointer
+depends on whether Top Byte Ignore (TBI) is enabled, which can be set
+independently for I and D keys.)
+
+.. admonition:: Note for API designers and language implementors
+
+  These are the *primitive* operations of pointer authentication, provided for
+  clarity of description.  They are not suitable either as high-level
+  interfaces or as primitives in a compiler IR because they expose raw
+  pointers.  Raw pointers require special attention in the language
+  implementation to avoid the accidental creation of exploitable code
+  sequences.
+
+The following details are all implementation-defined:
+
+- the nature of a signed pointer
+- the size of a discriminator
+- the number and nature of the signing keys
+- the implementation of the ``sign``, ``auth``, ``strip``, and ``sign_generic``
+  operations
+
+While the use of the terms "sign" and "signed pointer" suggest the use of
+a cryptographic signature, other implementations may be possible.  See
+`Alternative implementations`_ for an exploration of implementation options.
+
+.. admonition:: Implementation example: Armv8.3
+
+  Readers may find it helpful to know how these terms map to Armv8.3 PAuth:
+
+  - A signed pointer is a pointer with a signature stored in the
+    otherwise-unused high bits.  The kernel configures the address width based
+    on the system's addressing needs, and enables TBI for I or D keys as
+    needed.  The bits above the address bits and below the TBI bits (if
+    enabled) are unused.  The signature width then depends on this addressing
+    configuration.
+
+  - A discriminator is a 64-bit integer.  Constant discriminators are 16-bit
+    integers.  Blending a constant discriminator into an address consists of
+    replacing the top 16 bits of the pointer containing the address with the
+    constant.  Pointers used for blending purposes should only have address
+    bits, since higher bits will be at least partially overwritten with the
+    constant discriminator.
+
+  - There are five 128-bit signing-key registers, each of which can only be
+    directly read or set by privileged code.  Of these, four are used for
+    signing pointers, and the fifth is used only for ``sign_generic``.  The key
+    data is simply a pepper added to the hash, not an encryption key, and so
+    can be initialized using random data.
+
+  - ``sign`` computes a cryptographic hash of the pointer, discriminator, and
+    signing key, and stores it in the high bits as the signature. ``auth``
+    removes the signature, computes the same hash, and compares the result with
+    the stored signature.  ``strip`` removes the signature without
+    authenticating it.  While ``aut*`` instructions do not themselves trap on
+    failure in Armv8.3 PAuth, they do with the later optional FPAC extension.
+    An implementation can also choose to emulate this trapping behavior by
+    emitting additional instructions around ``aut*``.
+
+  - ``sign_generic`` corresponds to the ``pacga`` instruction, which takes two
+    64-bit values and produces a 64-bit cryptographic hash. Implementations of
+    this instruction are not required to produce meaningful data in all bits of
+    the result.
+
+Discriminators
+~~~~~~~~~~~~~~
+
+A discriminator is arbitrary extra data which alters the signature calculated
+for a pointer.  When two pointers are signed differently --- either with
+different keys or with different discriminators --- an attacker cannot simply
+replace one pointer with the other.
+
+To use standard cryptographic terminology, a discriminator acts as a
+`salt <https://en.wikipedia.org/wiki/Salt_(cryptography)>`_ in the signing of a
+pointer, and the key data acts as a
+`pepper <https://en.wikipedia.org/wiki/Pepper_(cryptography)>`_.  That is,
+both the discriminator and key data are ultimately just added as inputs to the
+signing algorithm along with the pointer, but they serve significantly
+different roles.  The key data is a common secret added to every signature,
+whereas the discriminator is a value that can be derived from
+the context in which a specific pointer is signed.  However, unlike a password
+salt, it's important that discriminators be *independently* derived from the
+circumstances of the signing; they should never simply be stored alongside
+a pointer.  Discriminators are then re-derived in authentication operations.
+
+The intrinsic interface in ``<ptrauth.h>`` allows an arbitrary discriminator
+value to be provided, but can only be used when running normal code.  The
+discriminators used by language ABIs must be restricted to make it feasible for
+the loader to sign pointers stored in global memory without needing excessive
+amounts of metadata.  Under these restrictions, a discriminator may consist of
+either or both of the following:
+
+- The address at which the pointer is stored in memory.  A pointer signed with
+  a discriminator which incorporates its storage address is said to have
+  **address diversity**.  In general, using address diversity means that
+  a pointer cannot be reliably copied by an attacker to or from a different
+  memory location.  However, an attacker may still be able to attack a larger
+  call sequence if they can alter the address through which the pointer is
+  accessed.  Furthermore, some situations cannot use address diversity because
+  of language or other restrictions.
+
+- A constant integer, called a **constant discriminator**. A pointer signed
+  with a non-zero constant discriminator is said to have **constant
+  diversity**.  If the discriminator is specific to a single declaration, it is
+  said to have **declaration diversity**; if the discriminator is specific to
+  a type of value, it is said to have **type diversity**.  For example, C++
+  v-tables on arm64e sign their component functions using a hash of their
+  method names and signatures, which provides declaration diversity; similarly,
+  C++ member function pointers sign their invocation functions using a hash of
+  the member pointer type, which provides type diversity.
+
+The implementation may need to restrict constant discriminators to be
+significantly smaller than the full size of a discriminator.  For example, on
+arm64e, constant discriminators are only 16-bit values.  This is believed to
+not significantly weaken the mitigation, since collisions remain uncommon.
+
+The algorithm for blending a constant discriminator with a storage address is
+implementation-defined.
+
+.. _Signing schemas:
+
+Signing Schemas
+~~~~~~~~~~~~~~~
+
+Correct use of pointer authentication requires the signing code and the
+authenticating code to agree about the **signing schema** for the pointer:
+
+- the abstract signing key with which the pointer should be signed and
+- an algorithm for computing the discriminator.
+
+As described in the section above on `Discriminators`_, in most situations, the
+discriminator is produced by taking a constant discriminator and optionally
+blending it with the storage address of the pointer.  In these situations, the
+signing schema breaks down even more simply:
+
+- the abstract signing key,
+- a constant discriminator, and
+- whether to use address diversity.
+
+It is important that the signing schema be independently derived at all signing
+and authentication sites.  Preferably, the schema should be hard-coded
+everywhere it is needed, but at the very least, it must not be derived by
+inspecting information stored along with the pointer.
+
+Language Features
+-----------------
+
+There is currently one main pointer authentication language feature:
+
+- The language provides the ``<ptrauth.h>`` intrinsic interface for manually
+  signing and authenticating pointers in code.  These can be used in
+  circumstances where very specific behavior is required.
+
+
+Language Extensions
+~~~~~~~~~~~~~~~~~~~
+
+Feature Testing
+^^^^^^^^^^^^^^^
+
+Whether the current target uses pointer authentication can be tested for with
+a number of different tests.
+
+- ``__has_feature(ptrauth_intrinsics)`` is true if ``<ptrauth.h>`` provides its
+  normal interface.  This may be true even on targets where pointer
+  authentication is not enabled by default.
+
+``<ptrauth.h>``
+~~~~~~~~~~~~~~~
+
+This header defines the following types and operations:
+
+``ptrauth_key``
+^^^^^^^^^^^^^^^
+
+This ``enum`` is the type of abstract signing keys.  In addition to defining
+the set of implementation-specific signing keys (for example, Armv8.3 defines
+``ptrauth_key_asia``), it also defines some portable aliases for those keys.
+For example, ``ptrauth_key_function_pointer`` is the key generally used for
+C function pointers, which will generally be suitable for other
+function-signing schemas.
+
+In all the operation descriptions below, key values must be constant values
+corresponding to one of the implementation-specific abstract signing keys from
+this ``enum``.
+
+``ptrauth_extra_data_t``
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+This is a ``typedef`` of a standard integer type of the correct size to hold
+a discriminator value.
+
+In the signing and authentication operation descriptions below, discriminator
+values must have either pointer type or integer type. If the discriminator is
+an integer, it will be coerced to ``ptrauth_extra_data_t``.
+
+``ptrauth_blend_discriminator``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c
+
+  ptrauth_blend_discriminator(pointer, integer)
+
+Produce a discriminator value which blends information from the given pointer
+and the given integer.
+
+Implementations may ignore some bits from each value, which is to say, the
+blending algorithm may be chosen for speed and convenience over theoretical
+strength as a hash-combining algorithm.  For example, arm64e simply overwrites
+the high 16 bits of the pointer with the low 16 bits of the integer, which can
+be done in a single instruction with an immediate integer.
+
+``pointer`` must have pointer type, and ``integer`` must have integer type. The
+result has type ``ptrauth_extra_data_t``.
+
+``ptrauth_strip``
+^^^^^^^^^^^^^^^^^
+
+.. code-block:: c
+
+  ptrauth_strip(signedPointer, key)
+
+Given that ``signedPointer`` matches the layout for signed pointers signed with
+the given key, extract the raw pointer from it.  This operation does not trap
+and cannot fail, even if the pointer is not validly signed.
+
+``ptrauth_sign_unauthenticated``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c
+
+  ptrauth_sign_unauthenticated(pointer, key, discriminator)
+
+Produce a signed pointer for the given raw pointer without applying any
+authentication or extra treatment.  This operation is not required to have the
+same behavior on a null pointer that the language implementation would.
+
+This is a treacherous operation that can easily result in signing oracles.
+Programs should use it seldom and carefully.
+
+``ptrauth_auth_and_resign``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c
+
+  ptrauth_auth_and_resign(pointer, oldKey, oldDiscriminator, newKey, newDiscriminator)
+
+Authenticate that ``pointer`` is signed with ``oldKey`` and
+``oldDiscriminator`` and then resign the raw-pointer result of that
+authentication with ``newKey`` and ``newDiscriminator``.
+
+``pointer`` must have pointer type.  The result will have the same type as
+``pointer``.  This operation is not required to have the same behavior on
+a null pointer that the language implementation would.
+
+The code sequence produced for this operation must not be directly attackable.
+However, if the discriminator values are not constant integers, their
+computations may still be attackable.  In the future, Clang should be enhanced
+to guaranteed non-attackability if these expressions are safely-derived.
+
+``ptrauth_auth_data``
+^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c
+
+  ptrauth_auth_data(pointer, key, discriminator)
+
+Authenticate that ``pointer`` is signed with ``key`` and ``discriminator`` and
+remove the signature.
+
+``pointer`` must have object pointer type.  The result will have the same type
+as ``pointer``.  This operation is not required to have the same behavior on
+a null pointer that the language implementation would.
+
+In the future when Clang makes safe derivation guarantees, the result of
+this operation should be considered safely-derived.
+
+``ptrauth_sign_generic_data``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c
+
+  ptrauth_sign_generic_data(value1, value2)
+
+Computes a signature for the given pair of values, incorporating a secret
+signing key.
+
+This operation can be used to verify that arbitrary data has not been tampered
+with by computing a signature for the data, storing that signature, and then
+repeating this process and verifying that it yields the same result.  This can
+be reasonably done in any number of ways; for example, a library could compute
+an ordinary checksum of the data and just sign the result in order to get the
+tamper-resistance advantages of the secret signing key (since otherwise an
+attacker could reliably overwrite both the data and the checksum).
+
+``value1`` and ``value2`` must be either pointers or integers.  If the integers
+are larger than ``uintptr_t`` then data not representable in ``uintptr_t`` may
+be discarded.
+
+The result will have type ``ptrauth_generic_signature_t``, which is an integer
+type.  Implementations are not required to make all bits of the result equally
+significant; in particular, some implementations are known to not leave
+meaningful data in the low bits.
+
+
+
+Alternative Implementations
+---------------------------
+
+Signature Storage
+~~~~~~~~~~~~~~~~~
+
+It is not critical for the security of pointer authentication that the
+signature be stored "together" with the pointer, as it is in Armv8.3. An
+implementation could just as well store the signature in a separate word, so
+that the ``sizeof`` a signed pointer would be larger than the ``sizeof`` a raw
+pointer.
+
+Storing the signature in the high bits, as Armv8.3 does, has several trade-offs:
+
+- Disadvantage: there are substantially fewer bits available for the signature,
+  weakening the mitigation by making it much easier for an attacker to simply
+  guess the correct signature.
+
+- Disadvantage: future growth of the address space will necessarily further
+  weaken the mitigation.
+
+- Advantage: memory layouts don't change, so it's possible for
+  pointer-authentication-enabled code (for example, in a system library) to
+  efficiently interoperate with existing code, as long as pointer
+  authentication can be disabled dynamically.
+
+- Advantage: the size of a signed pointer doesn't grow, which might
+  significantly increase memory requirements, code size, and register pressure.
+
+- Advantage: the size of a signed pointer is the same as a raw pointer, so
+  generic APIs which work in types like `void *` (such as `dlsym`) can still
+  return signed pointers.  This means that clients of these APIs will not
+  require insecure code in order to correctly receive a function pointer.
+
+Hashing vs. Encrypting Pointers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Armv8.3 implements ``sign`` by computing a cryptographic hash and storing that
+in the spare bits of the pointer.  This means that there are relatively few
+possible values for the valid signed pointer, since the bits corresponding to
+the raw pointer are known.  Together with an ``auth`` oracle, this can make it
+computationally feasible to discover the correct signature with brute force.
+(The implementation should of course endeavor not to introduce ``auth``
+oracles, but this can be difficult, and attackers can be devious.)
+
+If the implementation can instead *encrypt* the pointer during ``sign`` and
+*decrypt* it during ``auth``, this brute-force attack becomes far less
+feasible, even with an ``auth`` oracle.  However, there are several problems
+with this idea:
+
+- It's unclear whether this kind of encryption is even possible without
+  increasing the storage size of a signed pointer.  If the storage size can be
+  increased, brute-force atacks can be equally well mitigated by simply storing
+  a larger signature.
+
+- It would likely be impossible to implement a ``strip`` operation, which might
+  make debuggers and other out-of-process tools far more difficult to write, as
+  well as generally making primitive debugging more challenging.
+
+- Implementations can benefit from being able to extract the raw pointer
+  immediately from a signed pointer.  An Armv8.3 processor executing an
+  ``auth``-and-load instruction can perform the load and ``auth`` in parallel;
+  a processor which instead encrypted the pointer would be forced to perform
+  these operations serially.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 0a963d1..623a4b3 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -191,6 +191,9 @@ Removed Compiler Flags
 -------------------------
 
 - The ``-freroll-loops`` flag has been removed. It had no effect since Clang 13.
+- ``-m[no-]unaligned-access`` is removed for RISC-V and LoongArch.
+  ``-m[no-]strict-align``, also supported by GCC, should be used instead.
+  (`#85350 <https://github.com/llvm/llvm-project/pull/85350>`_.)
 
 Attribute Changes in Clang
 --------------------------
@@ -376,6 +379,8 @@ Bug Fixes to C++ Support
 - Fixed a crash in constant evaluation when trying to access a
   captured ``this`` pointer in a lambda with an explicit object parameter.
   Fixes (#GH80997)
+- Fix an issue where missing set friend declaration in template class instantiation.
+  Fixes (#GH84368).
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -490,6 +495,10 @@ libclang
 Static Analyzer
 ---------------
 
+- Fixed crashing on loops if the loop variable was declared in switch blocks
+  but not under any case blocks if ``unroll-loops=true`` analyzer config is
+  set. (#GH68819)
+
 New features
 ^^^^^^^^^^^^
 
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 1942b0e..1091605 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2244,6 +2244,7 @@ public:
   bool isFloatingType() const;     // C99 6.2.5p11 (real floating + complex)
   bool isHalfType() const;         // OpenCL 6.1.1.1, NEON (IEEE 754-2008 half)
   bool isFloat16Type() const;      // C11 extension ISO/IEC TS 18661
+  bool isFloat32Type() const;
   bool isBFloat16Type() const;
   bool isFloat128Type() const;
   bool isIbm128Type() const;
@@ -7452,6 +7453,10 @@ inline bool Type::isFloat16Type() const {
   return isSpecificBuiltinType(BuiltinType::Float16);
 }
 
+inline bool Type::isFloat32Type() const {
+  return isSpecificBuiltinType(BuiltinType::Float);
+}
+
 inline bool Type::isBFloat16Type() const {
   return isSpecificBuiltinType(BuiltinType::BFloat16);
 }
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 9c70337..eae41b5 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4354,6 +4354,43 @@ def CoroSuspend : CoroLangBuiltin {
   let Prototype = "char(_Constant bool)";
 }
 
+// Pointer authentication builtins.
+def PtrauthStrip : Builtin {
+  let Spellings = ["__builtin_ptrauth_strip"];
+  let Attributes = [CustomTypeChecking, NoThrow, Const];
+  let Prototype = "void*(void*,int)";
+}
+
+def PtrauthBlendDiscriminator : Builtin {
+  let Spellings = ["__builtin_ptrauth_blend_discriminator"];
+  let Attributes = [CustomTypeChecking, NoThrow, Const];
+  let Prototype = "size_t(void*,int)";
+}
+
+def PtrauthSignUnauthenticated : Builtin {
+  let Spellings = ["__builtin_ptrauth_sign_unauthenticated"];
+  let Attributes = [CustomTypeChecking, NoThrow, Const];
+  let Prototype = "void*(void*,int,void*)";
+}
+
+def PtrauthSignGenericData : Builtin {
+  let Spellings = ["__builtin_ptrauth_sign_generic_data"];
+  let Attributes = [CustomTypeChecking, NoThrow, Const];
+  let Prototype = "size_t(void*,void*)";
+}
+
+def PtrauthAuthAndResign : Builtin {
+  let Spellings = ["__builtin_ptrauth_auth_and_resign"];
+  let Attributes = [CustomTypeChecking, NoThrow];
+  let Prototype = "void*(void*,int,void*,int,void*)";
+}
+
+def PtrauthAuth : Builtin {
+  let Spellings = ["__builtin_ptrauth_auth"];
+  let Attributes = [CustomTypeChecking, NoThrow];
+  let Prototype = "void*(void*,int,void*)";
+}
+
 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Pipe functions.
 // We need the generic prototype, since the packet type could be anything.
 def ReadPipe : OCLPipeLangBuiltin {
@@ -4572,6 +4609,12 @@ def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLIsinf : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_isinf"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 def HLSLLerp : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_lerp"];
   let Attributes = [NoThrow, Const];
@@ -4590,6 +4633,12 @@ def HLSLRcp : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLRSqrt : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_rsqrt"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index b5cbe90..cf8711c 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -50,6 +50,9 @@ BUILTIN(__builtin_arm_wfi, "v", "")
 BUILTIN(__builtin_arm_sev, "v", "")
 BUILTIN(__builtin_arm_sevl, "v", "")
 
+// Like __builtin_trap but provide an 16-bit immediate reason code (which goes into `brk #N`).
+BUILTIN(__builtin_arm_trap, "vUIs", "nr")
+
 // CRC32
 TARGET_BUILTIN(__builtin_arm_crc32b, "UiUiUc", "nc", "crc")
 TARGET_BUILTIN(__builtin_arm_crc32cb, "UiUiUc", "nc", "crc")
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 3f14167..f5ff891 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -875,6 +875,7 @@ def ZeroLengthArray : DiagGroup<"zero-length-array">;
 def GNUZeroLineDirective : DiagGroup<"gnu-zero-line-directive">;
 def GNUZeroVariadicMacroArguments : DiagGroup<"gnu-zero-variadic-macro-arguments">;
 def MisleadingIndentation : DiagGroup<"misleading-indentation">;
+def PtrAuthNullPointers : DiagGroup<"ptrauth-null-pointers">;
 
 // This covers both the deprecated case (in C++98)
 // and the extension case (in C++11 onwards).
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index d7ab163..8e97902 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -911,6 +911,22 @@ def warn_fortify_scanf_overflow : Warning<
 def err_function_start_invalid_type: Error<
   "argument must be a function">;
 
+def err_ptrauth_disabled :
+  Error<"this target does not support pointer authentication">;
+def err_ptrauth_invalid_key :
+  Error<"%0 does not identify a valid pointer authentication key for "
+        "the current target">;
+def err_ptrauth_value_bad_type :
+  Error<"%select{signed value|extra discriminator|blended pointer|blended "
+        "integer}0 must have %select{pointer|integer|pointer or integer}1 "
+        "type; type here is %2">;
+def warn_ptrauth_sign_null_pointer :
+  Warning<"signing a null pointer will yield a non-null pointer">,
+  InGroup<PtrAuthNullPointers>;
+def warn_ptrauth_auth_null_pointer :
+  Warning<"authenticating a null pointer will almost certainly trap">,
+  InGroup<PtrAuthNullPointers>;
+
 /// main()
 // static main() is not an error in C, just in C++.
 def warn_static_main : Warning<"'main' should not be declared static">,
diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def
index 5fad5fc..eeed5f4 100644
--- a/clang/include/clang/Basic/Features.def
+++ b/clang/include/clang/Basic/Features.def
@@ -101,6 +101,7 @@ FEATURE(memory_sanitizer,
 FEATURE(thread_sanitizer, LangOpts.Sanitize.has(SanitizerKind::Thread))
 FEATURE(dataflow_sanitizer, LangOpts.Sanitize.has(SanitizerKind::DataFlow))
 FEATURE(scudo, LangOpts.Sanitize.hasOneOf(SanitizerKind::Scudo))
+FEATURE(ptrauth_intrinsics, LangOpts.PointerAuthIntrinsics)
 FEATURE(swiftasynccc,
   PP.getTargetInfo().checkCallingConvention(CC_SwiftAsync) ==
   clang::TargetInfo::CCCR_OK)
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 472fd9f..8ef6700 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -161,6 +161,8 @@ LANGOPT(DllExportInlines  , 1, 1, "dllexported classes dllexport inline methods"
 LANGOPT(RelaxedTemplateTemplateArgs, 1, 0, "C++17 relaxed matching of template template arguments")
 LANGOPT(ExperimentalLibrary, 1, 0, "enable unstable and experimental library features")
 
+LANGOPT(PointerAuthIntrinsics, 1, 0, "pointer authentication intrinsics")
+
 LANGOPT(DoubleSquareBracketAttributes, 1, 0, "'[[]]' attributes extension for all language standard modes")
 
 COMPATIBLE_LANGOPT(RecoveryAST, 1, 1, "Preserve expressions in AST when encountering errors")
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 7682f84..374595e 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -24,6 +24,7 @@
 #include "clang/Basic/TargetOptions.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/SmallSet.h"
@@ -1571,6 +1572,11 @@ public:
     return getAddressSpaceMap()[(unsigned)AS];
   }
 
+  /// Determine whether the given pointer-authentication key is valid.
+  ///
+  /// The value has been coerced to type 'int'.
+  virtual bool validatePointerAuthKey(const llvm::APSInt &value) const;
+
   /// Map from the address space field in builtin description strings to the
   /// language address space.
   virtual LangAS getOpenCLBuiltinAddressSpace(unsigned AS) const {
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index a7e43b4..acb7592 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4106,6 +4106,14 @@ defm strict_return : BoolFOption<"strict-return",
             " of a non-void function as unreachable">,
   PosFlag<SetTrue>>;
 
+let Group = f_Group in {
+  let Visibility = [ClangOption,CC1Option] in {
+    def fptrauth_intrinsics : Flag<["-"], "fptrauth-intrinsics">,
+      HelpText<"Enable pointer authentication intrinsics">;
+  }
+  def fno_ptrauth_intrinsics : Flag<["-"], "fno-ptrauth-intrinsics">;
+}
+
 def fenable_matrix : Flag<["-"], "fenable-matrix">, Group<f_Group>,
     Visibility<[ClangOption, CC1Option]>,
     HelpText<"Enable matrix data type and related builtin functions">,
@@ -4696,21 +4704,17 @@ def mrvv_vector_bits_EQ : Joined<["-"], "mrvv-vector-bits=">, Group<m_Group>,
     " (RISC-V only)")>;
 
 def munaligned_access : Flag<["-"], "munaligned-access">, Group<m_Group>,
-  HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64/LoongArch/RISC-V only)">;
+  HelpText<"Allow memory accesses to be unaligned (AArch32 only)">;
 def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group<m_Group>,
-  HelpText<"Force all memory accesses to be aligned (AArch32/AArch64/LoongArch/RISC-V only)">;
+  HelpText<"Force all memory accesses to be aligned (AArch32 only)">;
 def munaligned_symbols : Flag<["-"], "munaligned-symbols">, Group<m_Group>,
   HelpText<"Expect external char-aligned symbols to be without ABI alignment (SystemZ only)">;
 def mno_unaligned_symbols : Flag<["-"], "mno-unaligned-symbols">, Group<m_Group>,
   HelpText<"Expect external char-aligned symbols to be without ABI alignment (SystemZ only)">;
-} // let Flags = [TargetSpecific]
-def mstrict_align : Flag<["-"], "mstrict-align">, Alias<mno_unaligned_access>,
-  Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>,
-  HelpText<"Force all memory accesses to be aligned (same as mno-unaligned-access)">;
-def mno_strict_align : Flag<["-"], "mno-strict-align">, Alias<munaligned_access>,
-  Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>,
-  HelpText<"Allow memory accesses to be unaligned (same as munaligned-access)">;
-let Flags = [TargetSpecific] in {
+def mstrict_align : Flag<["-"], "mstrict-align">,
+  HelpText<"Force all memory accesses to be aligned (AArch64/LoongArch/RISC-V only)">;
+def mno_strict_align : Flag<["-"], "mno-strict-align">,
+  HelpText<"Allow memory accesses to be unaligned (AArch64/LoongArch/RISC-V only)">;
 def mno_thumb : Flag<["-"], "mno-thumb">, Group<m_arm_Features_Group>;
 def mrestrict_it: Flag<["-"], "mrestrict-it">, Group<m_arm_Features_Group>,
   HelpText<"Disallow generation of complex IT blocks. It is off by default.">;
diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index fbe2e8f..a4f9cad 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -580,7 +580,7 @@ public:
 
   // Return the DWARF version to emit, in the absence of arguments
   // to the contrary.
-  virtual unsigned GetDefaultDwarfVersion() const;
+  virtual unsigned GetDefaultDwarfVersion() const { return 5; }
 
   // Some toolchains may have different restrictions on the DWARF version and
   // may need to adjust it. E.g. NVPTX may need to enforce DWARF2 even when host
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 590297f..a72c1b1 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -5228,6 +5228,9 @@ extern const char *DefaultFormatStyle;
 /// Different builds can modify the value to the preferred styles.
 extern const char *DefaultFallbackStyle;
 
+/// Whether the language is C/C++/Objective-C/Objective-C++.
+extern bool IsCpp;
+
 /// Construct a FormatStyle based on ``StyleName``.
 ///
 /// ``StyleName`` can take several forms:
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 69d5709..95ea5eb 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -3060,6 +3060,8 @@ public:
                                     TemplateIdAnnotation *TemplateId,
                                     bool IsMemberSpecialization);
 
+  bool checkConstantPointerAuthKey(Expr *keyExpr, unsigned &key);
+
   void DiagnoseFunctionSpecifiers(const DeclSpec &DS);
   NamedDecl *getShadowedDeclaration(const TypedefNameDecl *D,
                                     const LookupResult &R);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b154a19..7137efb 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11858,8 +11858,8 @@ static QualType getObjectType(APValue::LValueBase B) {
 static const Expr *ignorePointerCastsAndParens(const Expr *E) {
   assert(E->isPRValue() && E->getType()->hasPointerRepresentation());
 
-  auto *NoParens = E->IgnoreParens();
-  auto *Cast = dyn_cast<CastExpr>(NoParens);
+  const Expr *NoParens = E->IgnoreParens();
+  const auto *Cast = dyn_cast<CastExpr>(NoParens);
   if (Cast == nullptr)
     return NoParens;
 
@@ -11870,7 +11870,7 @@ static const Expr *ignorePointerCastsAndParens(const Expr *E) {
       CastKind != CK_AddressSpaceConversion)
     return NoParens;
 
-  auto *SubExpr = Cast->getSubExpr();
+  const auto *SubExpr = Cast->getSubExpr();
   if (!SubExpr->getType()->hasPointerRepresentation() || !SubExpr->isPRValue())
     return NoParens;
   return ignorePointerCastsAndParens(SubExpr);
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 86304a5..f07e430 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -82,15 +82,27 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
     if (DiscardResult)
       return this->discard(SubExpr);
 
-    if (SubExpr->getType()->isAnyComplexType())
-      return this->delegate(SubExpr);
+    std::optional<PrimType> SubExprT = classify(SubExpr->getType());
+    // Prepare storage for the result.
+    if (!Initializing && !SubExprT) {
+      std::optional<unsigned> LocalIndex =
+          allocateLocal(SubExpr, /*IsExtended=*/false);
+      if (!LocalIndex)
+        return false;
+      if (!this->emitGetPtrLocal(*LocalIndex, CE))
+        return false;
+    }
 
     if (!this->visit(SubExpr))
       return false;
 
-    if (std::optional<PrimType> SubExprT = classify(SubExpr->getType()))
+    if (SubExprT)
       return this->emitLoadPop(*SubExprT, CE);
-    return false;
+
+    // If the subexpr type is not primitive, we need to perform a copy here.
+    // This happens for example in C when dereferencing a pointer of struct
+    // type.
+    return this->emitMemcpy(CE);
   }
 
   case CK_UncheckedDerivedToBase:
@@ -296,14 +308,11 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
     // Location for the SubExpr.
     // Since SubExpr is of complex type, visiting it results in a pointer
     // anyway, so we just create a temporary pointer variable.
-    std::optional<unsigned> SubExprOffset = allocateLocalPrimitive(
+    unsigned SubExprOffset = allocateLocalPrimitive(
         SubExpr, PT_Ptr, /*IsConst=*/true, /*IsExtended=*/false);
-    if (!SubExprOffset)
-      return false;
-
     if (!this->visit(SubExpr))
       return false;
-    if (!this->emitSetLocal(PT_Ptr, *SubExprOffset, CE))
+    if (!this->emitSetLocal(PT_Ptr, SubExprOffset, CE))
       return false;
 
     PrimType SourceElemT = classifyComplexElementType(SubExpr->getType());
@@ -312,7 +321,7 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
     PrimType DestElemT = classifyPrim(DestElemType);
     // Cast both elements individually.
     for (unsigned I = 0; I != 2; ++I) {
-      if (!this->emitGetLocal(PT_Ptr, *SubExprOffset, CE))
+      if (!this->emitGetLocal(PT_Ptr, SubExprOffset, CE))
         return false;
       if (!this->emitArrayElemPop(SourceElemT, I, CE))
         return false;
@@ -676,11 +685,17 @@ bool ByteCodeExprGen<Emitter>::VisitComplexBinOp(const BinaryOperator *E) {
     if (!this->emitSetLocal(PT_Ptr, ResultOffset, E))
       return false;
   }
+  QualType LHSType = LHS->getType();
+  if (const auto *AT = LHSType->getAs<AtomicType>())
+    LHSType = AT->getValueType();
+  QualType RHSType = RHS->getType();
+  if (const auto *AT = RHSType->getAs<AtomicType>())
+    RHSType = AT->getValueType();
 
   // Evaluate LHS and save value to LHSOffset.
   bool LHSIsComplex;
   unsigned LHSOffset;
-  if (LHS->getType()->isAnyComplexType()) {
+  if (LHSType->isAnyComplexType()) {
     LHSIsComplex = true;
     LHSOffset = this->allocateLocalPrimitive(LHS, PT_Ptr, true, false);
     if (!this->visit(LHS))
@@ -689,7 +704,7 @@ bool ByteCodeExprGen<Emitter>::VisitComplexBinOp(const BinaryOperator *E) {
       return false;
   } else {
     LHSIsComplex = false;
-    PrimType LHST = classifyPrim(LHS->getType());
+    PrimType LHST = classifyPrim(LHSType);
     LHSOffset = this->allocateLocalPrimitive(LHS, LHST, true, false);
     if (!this->visit(LHS))
       return false;
@@ -700,7 +715,7 @@ bool ByteCodeExprGen<Emitter>::VisitComplexBinOp(const BinaryOperator *E) {
   // Same with RHS.
   bool RHSIsComplex;
   unsigned RHSOffset;
-  if (RHS->getType()->isAnyComplexType()) {
+  if (RHSType->isAnyComplexType()) {
     RHSIsComplex = true;
     RHSOffset = this->allocateLocalPrimitive(RHS, PT_Ptr, true, false);
     if (!this->visit(RHS))
@@ -709,7 +724,7 @@ bool ByteCodeExprGen<Emitter>::VisitComplexBinOp(const BinaryOperator *E) {
       return false;
   } else {
     RHSIsComplex = false;
-    PrimType RHST = classifyPrim(RHS->getType());
+    PrimType RHST = classifyPrim(RHSType);
     RHSOffset = this->allocateLocalPrimitive(RHS, RHST, true, false);
     if (!this->visit(RHS))
       return false;
@@ -1233,22 +1248,19 @@ bool ByteCodeExprGen<Emitter>::VisitOpaqueValueExpr(const OpaqueValueExpr *E) {
   // At this point we either have the evaluated source expression or a pointer
   // to an object on the stack. We want to create a local variable that stores
   // this value.
-  std::optional<unsigned> LocalIndex =
-      allocateLocalPrimitive(E, SubExprT, /*IsConst=*/true);
-  if (!LocalIndex)
-    return false;
-  if (!this->emitSetLocal(SubExprT, *LocalIndex, E))
+  unsigned LocalIndex = allocateLocalPrimitive(E, SubExprT, /*IsConst=*/true);
+  if (!this->emitSetLocal(SubExprT, LocalIndex, E))
     return false;
 
   // Here the local variable is created but the value is removed from the stack,
   // so we put it back if the caller needs it.
   if (!DiscardResult) {
-    if (!this->emitGetLocal(SubExprT, *LocalIndex, E))
+    if (!this->emitGetLocal(SubExprT, LocalIndex, E))
       return false;
   }
 
   // This is cleaned up when the local variable is destroyed.
-  OpaqueExprs.insert({E, *LocalIndex});
+  OpaqueExprs.insert({E, LocalIndex});
 
   return true;
 }
@@ -1642,14 +1654,13 @@ bool ByteCodeExprGen<Emitter>::VisitMaterializeTemporaryExpr(
 
   // For everyhing else, use local variables.
   if (SubExprT) {
-    if (std::optional<unsigned> LocalIndex = allocateLocalPrimitive(
-            SubExpr, *SubExprT, /*IsConst=*/true, /*IsExtended=*/true)) {
-      if (!this->visit(SubExpr))
-        return false;
-      if (!this->emitSetLocal(*SubExprT, *LocalIndex, E))
-        return false;
-      return this->emitGetPtrLocal(*LocalIndex, E);
-    }
+    unsigned LocalIndex = allocateLocalPrimitive(
+        SubExpr, *SubExprT, /*IsConst=*/true, /*IsExtended=*/true);
+    if (!this->visit(SubExpr))
+      return false;
+    if (!this->emitSetLocal(*SubExprT, LocalIndex, E))
+      return false;
+    return this->emitGetPtrLocal(LocalIndex, E);
   } else {
     const Expr *Inner = E->getSubExpr()->skipRValueSubobjectAdjustments();
 
@@ -2215,6 +2226,12 @@ bool ByteCodeExprGen<Emitter>::VisitPseudoObjectExpr(
   return true;
 }
 
+template <class Emitter>
+bool ByteCodeExprGen<Emitter>::VisitPackIndexingExpr(
+    const PackIndexingExpr *E) {
+  return this->delegate(E->getSelectedExpr());
+}
+
 template <class Emitter> bool ByteCodeExprGen<Emitter>::discard(const Expr *E) {
   if (E->containsErrors())
     return false;
@@ -2227,7 +2244,7 @@ template <class Emitter> bool ByteCodeExprGen<Emitter>::discard(const Expr *E) {
 template <class Emitter>
 bool ByteCodeExprGen<Emitter>::delegate(const Expr *E) {
   if (E->containsErrors())
-    return false;
+    return this->emitError(E);
 
   // We're basically doing:
   // OptionScope<Emitter> Scope(this, DicardResult, Initializing);
@@ -2237,7 +2254,7 @@ bool ByteCodeExprGen<Emitter>::delegate(const Expr *E) {
 
 template <class Emitter> bool ByteCodeExprGen<Emitter>::visit(const Expr *E) {
   if (E->containsErrors())
-    return false;
+    return this->emitError(E);
 
   if (E->getType()->isVoidType())
     return this->discard(E);
@@ -2266,7 +2283,7 @@ bool ByteCodeExprGen<Emitter>::visitInitializer(const Expr *E) {
   assert(!classify(E->getType()));
 
   if (E->containsErrors())
-    return false;
+    return this->emitError(E);
 
   OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/false,
                              /*NewInitializing=*/true);
@@ -2660,18 +2677,12 @@ bool ByteCodeExprGen<Emitter>::visitVarDecl(const VarDecl *VD) {
     if (P.getGlobal(VD))
       return true;
 
-    // Ignore external declarations. We will instead emit a dummy
-    // pointer when we see a DeclRefExpr for them.
-    if (VD->hasExternalStorage())
-      return true;
-
     std::optional<unsigned> GlobalIndex = P.createGlobal(VD, Init);
 
     if (!GlobalIndex)
       return false;
 
-    assert(Init);
-    {
+    if (Init) {
       DeclScope<Emitter> LocalScope(this, VD);
 
       if (VarT) {
@@ -2681,6 +2692,7 @@ bool ByteCodeExprGen<Emitter>::visitVarDecl(const VarDecl *VD) {
       }
       return this->visitGlobalInitializer(Init, *GlobalIndex);
     }
+    return true;
   } else {
     VariableScope<Emitter> LocalScope(this);
     if (VarT) {
@@ -3247,53 +3259,20 @@ bool ByteCodeExprGen<Emitter>::VisitDeclRefExpr(const DeclRefExpr *E) {
   // pointer to the actual value) instead of a pointer to the pointer to the
   // value.
   bool IsReference = D->getType()->isReferenceType();
-  // Complex values are copied in the AST via a simply assignment or
-  // ltor cast. But we represent them as two-element arrays, which means
-  // we pass them around as pointers. So, to assignm from them, we will
-  // have to copy both (primitive) elements instead.
-  bool IsComplex = D->getType()->isAnyComplexType();
 
   // Check for local/global variables and parameters.
   if (auto It = Locals.find(D); It != Locals.end()) {
     const unsigned Offset = It->second.Offset;
-    // FIXME: Fix the code duplication here with the code in the global case.
-    if (Initializing && IsComplex) {
-      PrimType ElemT = classifyComplexElementType(D->getType());
-      for (unsigned I = 0; I != 2; ++I) {
-        if (!this->emitGetPtrLocal(Offset, E))
-          return false;
-        if (!this->emitArrayElemPop(ElemT, I, E))
-          return false;
-        if (!this->emitInitElem(ElemT, I, E))
-          return false;
-      }
-      return true;
-    }
-
     if (IsReference)
       return this->emitGetLocal(PT_Ptr, Offset, E);
     return this->emitGetPtrLocal(Offset, E);
   } else if (auto GlobalIndex = P.getGlobal(D)) {
-    if (Initializing && IsComplex) {
-      PrimType ElemT = classifyComplexElementType(D->getType());
-      for (unsigned I = 0; I != 2; ++I) {
-        if (!this->emitGetPtrGlobal(*GlobalIndex, E))
-          return false;
-        if (!this->emitArrayElemPop(ElemT, I, E))
-          return false;
-        if (!this->emitInitElem(ElemT, I, E))
-          return false;
-      }
-      return true;
-    }
-
     if (IsReference)
       return this->emitGetGlobalPtr(*GlobalIndex, E);
 
     return this->emitGetPtrGlobal(*GlobalIndex, E);
   } else if (const auto *PVD = dyn_cast<ParmVarDecl>(D)) {
     if (auto It = this->Params.find(PVD); It != this->Params.end()) {
-      // FIXME: _Complex initializing case?
       if (IsReference || !It->second.IsPtr)
         return this->emitGetParamPtr(It->second.Offset, E);
 
@@ -3322,9 +3301,6 @@ bool ByteCodeExprGen<Emitter>::VisitDeclRefExpr(const DeclRefExpr *E) {
         // Retry.
         return this->VisitDeclRefExpr(E);
       }
-
-      if (VD->hasExternalStorage())
-        return this->emitInvalidDeclRef(E, E);
     }
   } else {
     if (const auto *VD = dyn_cast<VarDecl>(D);
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h
index 5ad2e74..969598c 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.h
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.h
@@ -119,6 +119,7 @@ public:
   bool VisitConceptSpecializationExpr(const ConceptSpecializationExpr *E);
   bool VisitCXXRewrittenBinaryOperator(const CXXRewrittenBinaryOperator *E);
   bool VisitPseudoObjectExpr(const PseudoObjectExpr *E);
+  bool VisitPackIndexingExpr(const PackIndexingExpr *E);
 
 protected:
   bool visitExpr(const Expr *E) override;
diff --git a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp
index 6da3860..675063e 100644
--- a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp
@@ -273,15 +273,18 @@ bool ByteCodeStmtGen<Emitter>::visitStmt(const Stmt *S) {
     return visitCaseStmt(cast<CaseStmt>(S));
   case Stmt::DefaultStmtClass:
     return visitDefaultStmt(cast<DefaultStmt>(S));
-  case Stmt::GCCAsmStmtClass:
-  case Stmt::MSAsmStmtClass:
-    return visitAsmStmt(cast<AsmStmt>(S));
   case Stmt::AttributedStmtClass:
     return visitAttributedStmt(cast<AttributedStmt>(S));
   case Stmt::CXXTryStmtClass:
     return visitCXXTryStmt(cast<CXXTryStmt>(S));
   case Stmt::NullStmtClass:
     return true;
+  // Always invalid statements.
+  case Stmt::GCCAsmStmtClass:
+  case Stmt::MSAsmStmtClass:
+  case Stmt::GotoStmtClass:
+  case Stmt::LabelStmtClass:
+    return this->emitInvalid(S);
   default: {
     if (auto *Exp = dyn_cast<Expr>(S))
       return this->discard(Exp);
@@ -420,6 +423,11 @@ bool ByteCodeStmtGen<Emitter>::visitWhileStmt(const WhileStmt *S) {
   LoopScope<Emitter> LS(this, EndLabel, CondLabel);
 
   this->emitLabel(CondLabel);
+
+  if (const DeclStmt *CondDecl = S->getConditionVariableDeclStmt())
+    if (!visitDeclStmt(CondDecl))
+      return false;
+
   if (!this->visitBool(Cond))
     return false;
   if (!this->jumpFalse(EndLabel))
@@ -484,6 +492,10 @@ bool ByteCodeStmtGen<Emitter>::visitForStmt(const ForStmt *S) {
   if (Init && !this->visitStmt(Init))
     return false;
   this->emitLabel(CondLabel);
+
+  if (const DeclStmt *CondDecl = S->getConditionVariableDeclStmt())
+    if (!visitDeclStmt(CondDecl))
+      return false;
   if (Cond) {
     if (!this->visitBool(Cond))
       return false;
@@ -582,17 +594,21 @@ bool ByteCodeStmtGen<Emitter>::visitContinueStmt(const ContinueStmt *S) {
 template <class Emitter>
 bool ByteCodeStmtGen<Emitter>::visitSwitchStmt(const SwitchStmt *S) {
   const Expr *Cond = S->getCond();
-  PrimType CondT = this->classifyPrim(Cond->getType());
 
   LabelTy EndLabel = this->getLabel();
   OptLabelTy DefaultLabel = std::nullopt;
-  unsigned CondVar = this->allocateLocalPrimitive(Cond, CondT, true, false);
 
   if (const auto *CondInit = S->getInit())
     if (!visitStmt(CondInit))
       return false;
 
+  if (const DeclStmt *CondDecl = S->getConditionVariableDeclStmt())
+    if (!visitDeclStmt(CondDecl))
+      return false;
+
   // Initialize condition variable.
+  PrimType CondT = this->classifyPrim(Cond->getType());
+  unsigned CondVar = this->allocateLocalPrimitive(Cond, CondT, true, false);
   if (!this->visit(Cond))
     return false;
   if (!this->emitSetLocal(CondT, CondVar, S))
@@ -658,11 +674,6 @@ bool ByteCodeStmtGen<Emitter>::visitDefaultStmt(const DefaultStmt *S) {
 }
 
 template <class Emitter>
-bool ByteCodeStmtGen<Emitter>::visitAsmStmt(const AsmStmt *S) {
-  return this->emitInvalid(S);
-}
-
-template <class Emitter>
 bool ByteCodeStmtGen<Emitter>::visitAttributedStmt(const AttributedStmt *S) {
   // Ignore all attributes.
   return this->visitStmt(S->getSubStmt());
diff --git a/clang/lib/AST/Interp/ByteCodeStmtGen.h b/clang/lib/AST/Interp/ByteCodeStmtGen.h
index 64e0358..ab7a591 100644
--- a/clang/lib/AST/Interp/ByteCodeStmtGen.h
+++ b/clang/lib/AST/Interp/ByteCodeStmtGen.h
@@ -63,7 +63,6 @@ private:
   bool visitSwitchStmt(const SwitchStmt *S);
   bool visitCaseStmt(const CaseStmt *S);
   bool visitDefaultStmt(const DefaultStmt *S);
-  bool visitAsmStmt(const AsmStmt *S);
   bool visitAttributedStmt(const AttributedStmt *S);
   bool visitCXXTryStmt(const CXXTryStmt *S);
 
diff --git a/clang/lib/AST/Interp/Descriptor.cpp b/clang/lib/AST/Interp/Descriptor.cpp
index ce7ed9c..a4ccc02 100644
--- a/clang/lib/AST/Interp/Descriptor.cpp
+++ b/clang/lib/AST/Interp/Descriptor.cpp
@@ -233,9 +233,10 @@ static BlockMoveFn getMoveArrayPrim(PrimType Type) {
 Descriptor::Descriptor(const DeclTy &D, PrimType Type, MetadataSize MD,
                        bool IsConst, bool IsTemporary, bool IsMutable)
     : Source(D), ElemSize(primSize(Type)), Size(ElemSize),
-      MDSize(MD.value_or(0)), AllocSize(align(Size + MDSize)), IsConst(IsConst),
-      IsMutable(IsMutable), IsTemporary(IsTemporary), CtorFn(getCtorPrim(Type)),
-      DtorFn(getDtorPrim(Type)), MoveFn(getMovePrim(Type)) {
+      MDSize(MD.value_or(0)), AllocSize(align(Size + MDSize)), PrimT(Type),
+      IsConst(IsConst), IsMutable(IsMutable), IsTemporary(IsTemporary),
+      CtorFn(getCtorPrim(Type)), DtorFn(getDtorPrim(Type)),
+      MoveFn(getMovePrim(Type)) {
   assert(AllocSize >= Size);
   assert(Source && "Missing source");
 }
@@ -246,7 +247,7 @@ Descriptor::Descriptor(const DeclTy &D, PrimType Type, MetadataSize MD,
                        bool IsMutable)
     : Source(D), ElemSize(primSize(Type)), Size(ElemSize * NumElems),
       MDSize(MD.value_or(0)),
-      AllocSize(align(MDSize) + align(Size) + sizeof(InitMapPtr)),
+      AllocSize(align(MDSize) + align(Size) + sizeof(InitMapPtr)), PrimT(Type),
       IsConst(IsConst), IsMutable(IsMutable), IsTemporary(IsTemporary),
       IsArray(true), CtorFn(getCtorArrayPrim(Type)),
       DtorFn(getDtorArrayPrim(Type)), MoveFn(getMoveArrayPrim(Type)) {
@@ -300,10 +301,19 @@ Descriptor::Descriptor(const DeclTy &D, const Record *R, MetadataSize MD,
   assert(Source && "Missing source");
 }
 
-Descriptor::Descriptor(const DeclTy &D, MetadataSize MD)
-    : Source(D), ElemSize(1), Size(ElemSize), MDSize(MD.value_or(0)),
-      AllocSize(Size + MDSize), ElemRecord(nullptr), IsConst(true),
-      IsMutable(false), IsTemporary(false), IsDummy(true) {
+/// Dummy.
+Descriptor::Descriptor(const DeclTy &D)
+    : Source(D), ElemSize(1), Size(1), MDSize(0), AllocSize(MDSize),
+      ElemRecord(nullptr), IsConst(true), IsMutable(false), IsTemporary(false),
+      IsDummy(true) {
+  assert(Source && "Missing source");
+}
+
+/// Dummy array.
+Descriptor::Descriptor(const DeclTy &D, UnknownSize)
+    : Source(D), ElemSize(1), Size(UnknownSizeMark), MDSize(0),
+      AllocSize(MDSize), ElemRecord(nullptr), IsConst(true), IsMutable(false),
+      IsTemporary(false), IsArray(true), IsDummy(true) {
   assert(Source && "Missing source");
 }
 
diff --git a/clang/lib/AST/Interp/Descriptor.h b/clang/lib/AST/Interp/Descriptor.h
index 0f64d67..4e25736 100644
--- a/clang/lib/AST/Interp/Descriptor.h
+++ b/clang/lib/AST/Interp/Descriptor.h
@@ -112,6 +112,10 @@ public:
   const Record *const ElemRecord = nullptr;
   /// Descriptor of the array element.
   const Descriptor *const ElemDesc = nullptr;
+  /// The primitive type this descriptor was created for,
+  /// or the primitive element type in case this is
+  /// a primitive array.
+  const std::optional<PrimType> PrimT = std::nullopt;
   /// Flag indicating if the block is mutable.
   const bool IsConst = false;
   /// Flag indicating if a field is mutable.
@@ -152,7 +156,11 @@ public:
   Descriptor(const DeclTy &D, const Record *R, MetadataSize MD, bool IsConst,
              bool IsTemporary, bool IsMutable);
 
-  Descriptor(const DeclTy &D, MetadataSize MD);
+  /// Allocates a dummy descriptor.
+  Descriptor(const DeclTy &D);
+
+  /// Allocates a dummy array descriptor.
+  Descriptor(const DeclTy &D, UnknownSize);
 
   QualType getType() const;
   QualType getElemQualType() const;
@@ -183,6 +191,11 @@ public:
     return Size;
   }
 
+  PrimType getPrimType() const {
+    assert(isPrimitiveArray() || isPrimitive());
+    return *PrimT;
+  }
+
   /// Returns the allocated size, including metadata.
   unsigned getAllocSize() const { return AllocSize; }
   /// returns the size of an element when the structure is viewed as an array.
diff --git a/clang/lib/AST/Interp/Disasm.cpp b/clang/lib/AST/Interp/Disasm.cpp
index 315ddb2..160701f 100644
--- a/clang/lib/AST/Interp/Disasm.cpp
+++ b/clang/lib/AST/Interp/Disasm.cpp
@@ -10,8 +10,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Boolean.h"
 #include "Floating.h"
 #include "Function.h"
+#include "FunctionPointer.h"
+#include "Integral.h"
 #include "IntegralAP.h"
 #include "Opcode.h"
 #include "PrimType.h"
@@ -86,6 +89,40 @@ LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const {
 
 LLVM_DUMP_METHOD void Program::dump() const { dump(llvm::errs()); }
 
+static const char *primTypeToString(PrimType T) {
+  switch (T) {
+  case PT_Sint8:
+    return "Sint8";
+  case PT_Uint8:
+    return "Uint8";
+  case PT_Sint16:
+    return "Sint16";
+  case PT_Uint16:
+    return "Uint16";
+  case PT_Sint32:
+    return "Sint32";
+  case PT_Uint32:
+    return "Uint32";
+  case PT_Sint64:
+    return "Sint64";
+  case PT_Uint64:
+    return "Uint64";
+  case PT_IntAP:
+    return "IntAP";
+  case PT_IntAPS:
+    return "IntAPS";
+  case PT_Bool:
+    return "Bool";
+  case PT_Float:
+    return "Float";
+  case PT_Ptr:
+    return "Ptr";
+  case PT_FnPtr:
+    return "FnPtr";
+  }
+  llvm_unreachable("Unhandled PrimType");
+}
+
 LLVM_DUMP_METHOD void Program::dump(llvm::raw_ostream &OS) const {
   {
     ColorScope SC(OS, true, {llvm::raw_ostream::BRIGHT_RED, true});
@@ -100,9 +137,10 @@ LLVM_DUMP_METHOD void Program::dump(llvm::raw_ostream &OS) const {
   unsigned GI = 0;
   for (const Global *G : Globals) {
     const Descriptor *Desc = G->block()->getDescriptor();
+    Pointer GP = getPtrGlobal(GI);
+
     OS << GI << ": " << (void *)G->block() << " ";
     {
-      Pointer GP = getPtrGlobal(GI);
       ColorScope SC(OS, true,
                     GP.isInitialized()
                         ? TerminalColor{llvm::raw_ostream::GREEN, false}
@@ -111,6 +149,15 @@ LLVM_DUMP_METHOD void Program::dump(llvm::raw_ostream &OS) const {
     }
     Desc->dump(OS);
     OS << "\n";
+    if (Desc->isPrimitive() && !Desc->isDummy()) {
+      OS << "   ";
+      {
+        ColorScope SC(OS, true, {llvm::raw_ostream::BRIGHT_CYAN, false});
+        OS << primTypeToString(Desc->getPrimType()) << " ";
+      }
+      TYPE_SWITCH(Desc->getPrimType(), { GP.deref<T>().print(OS); });
+      OS << "\n";
+    }
     ++GI;
   }
 
diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp
index 4f3cd6c..0ce64a5 100644
--- a/clang/lib/AST/Interp/Interp.cpp
+++ b/clang/lib/AST/Interp/Interp.cpp
@@ -254,10 +254,10 @@ bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc) {
     if (VD->isConstexpr())
       return true;
 
+    QualType T = VD->getType();
     if (S.getLangOpts().CPlusPlus && !S.getLangOpts().CPlusPlus11)
-      return false;
+      return T->isSignedIntegerOrEnumerationType() || T->isUnsignedIntegerOrEnumerationType();
 
-    QualType T = VD->getType();
     if (T.isConstQualified())
       return true;
 
@@ -485,7 +485,9 @@ bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) {
         // Don't emit anything if the function isn't defined and we're checking
         // for a constant expression. It might be defined at the point we're
         // actually calling it.
-        if (!DiagDecl->isDefined() && S.checkingPotentialConstantExpression())
+        bool IsExtern = DiagDecl->getStorageClass() == SC_Extern;
+        if (!DiagDecl->isDefined() && !IsExtern &&
+            S.checkingPotentialConstantExpression())
           return false;
 
         // If the declaration is defined _and_ declared 'constexpr', the below
diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index db80e2d..405993e 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -122,6 +122,9 @@ bool CheckNonNullArgs(InterpState &S, CodePtr OpPC, const Function *F,
 bool SetThreeWayComparisonField(InterpState &S, CodePtr OpPC,
                                 const Pointer &Ptr, const APSInt &IntValue);
 
+/// Copy the contents of Src into Dest.
+bool DoMemcpy(InterpState &S, CodePtr OpPC, const Pointer &Src, Pointer &Dest);
+
 /// Checks if the shift operation is legal.
 template <typename LT, typename RT>
 bool CheckShift(InterpState &S, CodePtr OpPC, const LT &LHS, const RT &RHS,
@@ -165,7 +168,8 @@ bool CheckDivRem(InterpState &S, CodePtr OpPC, const T &LHS, const T &RHS) {
     const auto *Op = cast<BinaryOperator>(S.Current->getExpr(OpPC));
     S.FFDiag(Op, diag::note_expr_divide_by_zero)
         << Op->getRHS()->getSourceRange();
-    return false;
+    if constexpr (!std::is_same_v<T, Floating>)
+      return false;
   }
 
   if (LHS.isSigned() && LHS.isMin() && RHS.isNegative() && RHS.isMinusOne()) {
@@ -1487,6 +1491,16 @@ bool InitElemPop(InterpState &S, CodePtr OpPC, uint32_t Idx) {
   return true;
 }
 
+inline bool Memcpy(InterpState &S, CodePtr OpPC) {
+  const Pointer &Src = S.Stk.pop<Pointer>();
+  Pointer &Dest = S.Stk.peek<Pointer>();
+
+  if (!CheckLoad(S, OpPC, Src))
+    return false;
+
+  return DoMemcpy(S, OpPC, Src, Dest);
+}
+
 //===----------------------------------------------------------------------===//
 // AddOffset, SubOffset
 //===----------------------------------------------------------------------===//
@@ -1933,8 +1947,15 @@ inline bool ArrayElemPtr(InterpState &S, CodePtr OpPC) {
   const T &Offset = S.Stk.pop<T>();
   const Pointer &Ptr = S.Stk.peek<Pointer>();
 
-  if (Ptr.isDummy())
-    return true;
+  if (!Ptr.isZero()) {
+    if (!CheckArray(S, OpPC, Ptr))
+      return false;
+
+    if (Ptr.isDummy()) {
+      S.Stk.push<Pointer>(Ptr);
+      return true;
+    }
+  }
 
   if (!OffsetHelper<T, ArithOp::Add>(S, OpPC, Offset, Ptr))
     return false;
@@ -1947,9 +1968,14 @@ inline bool ArrayElemPtrPop(InterpState &S, CodePtr OpPC) {
   const T &Offset = S.Stk.pop<T>();
   const Pointer &Ptr = S.Stk.pop<Pointer>();
 
-  if (Ptr.isDummy()) {
-    S.Stk.push<Pointer>(Ptr);
-    return true;
+  if (!Ptr.isZero()) {
+    if (!CheckArray(S, OpPC, Ptr))
+      return false;
+
+    if (Ptr.isDummy()) {
+      S.Stk.push<Pointer>(Ptr);
+      return true;
+    }
   }
 
   if (!OffsetHelper<T, ArithOp::Add>(S, OpPC, Offset, Ptr))
@@ -2201,6 +2227,9 @@ inline bool Invalid(InterpState &S, CodePtr OpPC) {
   return false;
 }
 
+/// Do nothing and just abort execution.
+inline bool Error(InterpState &S, CodePtr OpPC) { return false; }
+
 /// Same here, but only for casts.
 inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind) {
   const SourceLocation &Loc = S.Current->getLocation(OpPC);
diff --git a/clang/lib/AST/Interp/InterpBuiltin.cpp b/clang/lib/AST/Interp/InterpBuiltin.cpp
index c500b9d..b5bd4e9 100644
--- a/clang/lib/AST/Interp/InterpBuiltin.cpp
+++ b/clang/lib/AST/Interp/InterpBuiltin.cpp
@@ -533,11 +533,12 @@ static bool interp__builtin_rotate(InterpState &S, CodePtr OpPC,
                                    const InterpFrame *Frame,
                                    const Function *Func, const CallExpr *Call,
                                    bool Right) {
-  PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType());
-  assert(ArgT == *S.getContext().classify(Call->getArg(1)->getType()));
+  PrimType AmountT = *S.getContext().classify(Call->getArg(1)->getType());
+  PrimType ValueT = *S.getContext().classify(Call->getArg(0)->getType());
 
-  APSInt Amount = peekToAPSInt(S.Stk, ArgT);
-  APSInt Value = peekToAPSInt(S.Stk, ArgT, align(primSize(ArgT)) * 2);
+  APSInt Amount = peekToAPSInt(S.Stk, AmountT);
+  APSInt Value = peekToAPSInt(
+      S.Stk, ValueT, align(primSize(AmountT)) + align(primSize(ValueT)));
 
   APSInt Result;
   if (Right)
@@ -605,10 +606,9 @@ static bool interp__builtin_eh_return_data_regno(InterpState &S, CodePtr OpPC,
   return true;
 }
 
-static bool interp__builtin_launder(InterpState &S, CodePtr OpPC,
-                                    const InterpFrame *Frame,
-                                    const Function *Func,
-                                    const CallExpr *Call) {
+/// Just takes the first Argument to the call and puts it on the stack.
+static bool noopPointer(InterpState &S, CodePtr OpPC, const InterpFrame *Frame,
+                        const Function *Func, const CallExpr *Call) {
   const Pointer &Arg = S.Stk.peek<Pointer>();
   S.Stk.push<Pointer>(Arg);
   return true;
@@ -1143,7 +1143,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
     break;
 
   case Builtin::BI__builtin_launder:
-    if (!interp__builtin_launder(S, OpPC, Frame, F, Call))
+  case Builtin::BI__builtin___CFStringMakeConstantString:
+  case Builtin::BI__builtin___NSStringMakeConstantString:
+    if (!noopPointer(S, OpPC, Frame, F, Call))
       return false;
     break;
 
@@ -1326,5 +1328,50 @@ bool SetThreeWayComparisonField(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+bool DoMemcpy(InterpState &S, CodePtr OpPC, const Pointer &Src, Pointer &Dest) {
+  assert(Src.isLive() && Dest.isLive());
+
+  [[maybe_unused]] const Descriptor *SrcDesc = Src.getFieldDesc();
+  const Descriptor *DestDesc = Dest.getFieldDesc();
+
+  assert(!DestDesc->isPrimitive() && !SrcDesc->isPrimitive());
+
+  if (DestDesc->isPrimitiveArray()) {
+    assert(SrcDesc->isPrimitiveArray());
+    assert(SrcDesc->getNumElems() == DestDesc->getNumElems());
+    PrimType ET = DestDesc->getPrimType();
+    for (unsigned I = 0, N = DestDesc->getNumElems(); I != N; ++I) {
+      Pointer DestElem = Dest.atIndex(I);
+      TYPE_SWITCH(ET, {
+        DestElem.deref<T>() = Src.atIndex(I).deref<T>();
+        DestElem.initialize();
+      });
+    }
+    return true;
+  }
+
+  if (DestDesc->isRecord()) {
+    assert(SrcDesc->isRecord());
+    assert(SrcDesc->ElemRecord == DestDesc->ElemRecord);
+    const Record *R = DestDesc->ElemRecord;
+    for (const Record::Field &F : R->fields()) {
+      Pointer DestField = Dest.atField(F.Offset);
+      if (std::optional<PrimType> FT = S.Ctx.classify(F.Decl->getType())) {
+        TYPE_SWITCH(*FT, {
+          DestField.deref<T>() = Src.atField(F.Offset).deref<T>();
+          DestField.initialize();
+        });
+      } else {
+        return Invalid(S, OpPC);
+      }
+    }
+    return true;
+  }
+
+  // FIXME: Composite types.
+
+  return Invalid(S, OpPC);
+}
+
 } // namespace interp
 } // namespace clang
diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td
index 9b99aa0..cc1310f 100644
--- a/clang/lib/AST/Interp/Opcodes.td
+++ b/clang/lib/AST/Interp/Opcodes.td
@@ -706,6 +706,7 @@ def Dup : Opcode {
 
 // [] -> []
 def Invalid : Opcode {}
+def Error : Opcode {}
 def InvalidCast : Opcode {
   let Args = [ArgCastKind];
 }
@@ -720,3 +721,5 @@ def CheckNonNullArg : Opcode {
   let Types = [PtrTypeClass];
   let HasGroup = 1;
 }
+
+def Memcpy : Opcode;
diff --git a/clang/lib/AST/Interp/Pointer.cpp b/clang/lib/AST/Interp/Pointer.cpp
index 3f85635..af60ced 100644
--- a/clang/lib/AST/Interp/Pointer.cpp
+++ b/clang/lib/AST/Interp/Pointer.cpp
@@ -320,10 +320,10 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx) const {
     // Complex types.
     if (const auto *CT = Ty->getAs<ComplexType>()) {
       QualType ElemTy = CT->getElementType();
-      std::optional<PrimType> ElemT = Ctx.classify(ElemTy);
-      assert(ElemT);
 
       if (ElemTy->isIntegerType()) {
+        std::optional<PrimType> ElemT = Ctx.classify(ElemTy);
+        assert(ElemT);
         INT_TYPE_SWITCH(*ElemT, {
           auto V1 = Ptr.atIndex(0).deref<T>();
           auto V2 = Ptr.atIndex(1).deref<T>();
diff --git a/clang/lib/AST/Interp/Pointer.h b/clang/lib/AST/Interp/Pointer.h
index 34ecdb9..fffb4ab 100644
--- a/clang/lib/AST/Interp/Pointer.h
+++ b/clang/lib/AST/Interp/Pointer.h
@@ -285,6 +285,11 @@ public:
   bool inPrimitiveArray() const { return getFieldDesc()->isPrimitiveArray(); }
   /// Checks if the structure is an array of unknown size.
   bool isUnknownSizeArray() const {
+    // If this points inside a dummy block, return true.
+    // FIXME: This might change in the future. If it does, we need
+    // to set the proper Ctor/Dtor functions for dummy Descriptors.
+    if (Base != 0 && Base != sizeof(InlineDescriptor) && isDummy())
+      return true;
     return getFieldDesc()->isUnknownSizeArray();
   }
   /// Checks if the pointer points to an array.
diff --git a/clang/lib/AST/Interp/Program.cpp b/clang/lib/AST/Interp/Program.cpp
index 86e18ed..da6f72c 100644
--- a/clang/lib/AST/Interp/Program.cpp
+++ b/clang/lib/AST/Interp/Program.cpp
@@ -145,11 +145,20 @@ std::optional<unsigned> Program::getOrCreateGlobal(const ValueDecl *VD,
 
 std::optional<unsigned> Program::getOrCreateDummy(const ValueDecl *VD) {
   // Dedup blocks since they are immutable and pointers cannot be compared.
-  if (auto It = DummyParams.find(VD); It != DummyParams.end())
+  if (auto It = DummyVariables.find(VD); It != DummyVariables.end())
     return It->second;
 
   // Create dummy descriptor.
-  Descriptor *Desc = allocateDescriptor(VD, std::nullopt);
+  // We create desriptors of 'array of unknown size' if the type is an array
+  // type _and_ the size isn't known (it's not a ConstantArrayType). If the size
+  // is known however, we create a regular dummy pointer.
+  Descriptor *Desc;
+  if (const auto *AT = VD->getType()->getAsArrayTypeUnsafe();
+      AT && !isa<ConstantArrayType>(AT))
+    Desc = allocateDescriptor(VD, Descriptor::UnknownSize{});
+  else
+    Desc = allocateDescriptor(VD);
+
   // Allocate a block for storage.
   unsigned I = Globals.size();
 
@@ -158,7 +167,7 @@ std::optional<unsigned> Program::getOrCreateDummy(const ValueDecl *VD) {
   G->block()->invokeCtor();
 
   Globals.push_back(G);
-  DummyParams[VD] = I;
+  DummyVariables[VD] = I;
   return I;
 }
 
diff --git a/clang/lib/AST/Interp/Program.h b/clang/lib/AST/Interp/Program.h
index 50bdb57..36b5a1f 100644
--- a/clang/lib/AST/Interp/Program.h
+++ b/clang/lib/AST/Interp/Program.h
@@ -208,7 +208,7 @@ private:
   llvm::DenseMap<const RecordDecl *, Record *> Records;
 
   /// Dummy parameter to generate pointers from.
-  llvm::DenseMap<const ValueDecl *, unsigned> DummyParams;
+  llvm::DenseMap<const ValueDecl *, unsigned> DummyVariables;
 
   /// Creates a new descriptor.
   template <typename... Ts>
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 96b3ad9..5d90551 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -925,6 +925,10 @@ bool TargetInfo::validateInputConstraint(
   return true;
 }
 
+bool TargetInfo::validatePointerAuthKey(const llvm::APSInt &value) const {
+  return false;
+}
+
 void TargetInfo::CheckFixedPointBits() const {
   // Check that the number of fractional and integral bits (and maybe sign) can
   // fit into the bits given for a fixed point type.
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 5abb060..fa8b5a8 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -14,6 +14,7 @@
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -1450,6 +1451,11 @@ int AArch64TargetInfo::getEHDataRegisterNumber(unsigned RegNo) const {
   return -1;
 }
 
+bool AArch64TargetInfo::validatePointerAuthKey(
+    const llvm::APSInt &value) const {
+  return 0 <= value && value <= 3;
+}
+
 bool AArch64TargetInfo::hasInt128Type() const { return true; }
 
 AArch64leTargetInfo::AArch64leTargetInfo(const llvm::Triple &Triple,
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index c1ba156..2dd6b21 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -195,6 +195,8 @@ public:
 
   int getEHDataRegisterNumber(unsigned RegNo) const override;
 
+  bool validatePointerAuthKey(const llvm::APSInt &value) const override;
+
   const char *getBFloat16Mangling() const override { return "u6__bf16"; };
   bool hasInt128Type() const override;
 
diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
index 26a54f6..b5ba11a 100644
--- a/clang/lib/Basic/Targets/BPF.cpp
+++ b/clang/lib/Basic/Targets/BPF.cpp
@@ -36,7 +36,7 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
     return;
   }
 
-  Builder.defineMacro("__BPF_FEATURE_ARENA_CAST");
+  Builder.defineMacro("__BPF_FEATURE_ADDR_SPACE_CAST");
 
   if (CPU.empty() || CPU == "generic" || CPU == "v1") {
     Builder.defineMacro("__BPF_CPU_VERSION__", "1");
@@ -45,6 +45,7 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
 
   std::string CpuVerNumStr = CPU.substr(1);
   Builder.defineMacro("__BPF_CPU_VERSION__", CpuVerNumStr);
+  Builder.defineMacro("__BPF_FEATURE_MAY_GOTO");
 
   int CpuVerNum = std::stoi(CpuVerNumStr);
   if (CpuVerNum >= 2)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 93ab465..e708bf3 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5208,6 +5208,73 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__iso_volatile_store64:
     return RValue::get(EmitISOVolatileStore(*this, E));
 
+  case Builtin::BI__builtin_ptrauth_auth:
+  case Builtin::BI__builtin_ptrauth_auth_and_resign:
+  case Builtin::BI__builtin_ptrauth_blend_discriminator:
+  case Builtin::BI__builtin_ptrauth_sign_generic_data:
+  case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
+  case Builtin::BI__builtin_ptrauth_strip: {
+    // Emit the arguments.
+    SmallVector<llvm::Value *, 5> Args;
+    for (auto argExpr : E->arguments())
+      Args.push_back(EmitScalarExpr(argExpr));
+
+    // Cast the value to intptr_t, saving its original type.
+    llvm::Type *OrigValueType = Args[0]->getType();
+    if (OrigValueType->isPointerTy())
+      Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
+
+    switch (BuiltinID) {
+    case Builtin::BI__builtin_ptrauth_auth_and_resign:
+      if (Args[4]->getType()->isPointerTy())
+        Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
+      LLVM_FALLTHROUGH;
+
+    case Builtin::BI__builtin_ptrauth_auth:
+    case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
+      if (Args[2]->getType()->isPointerTy())
+        Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
+      break;
+
+    case Builtin::BI__builtin_ptrauth_sign_generic_data:
+      if (Args[1]->getType()->isPointerTy())
+        Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
+      break;
+
+    case Builtin::BI__builtin_ptrauth_blend_discriminator:
+    case Builtin::BI__builtin_ptrauth_strip:
+      break;
+    }
+
+    // Call the intrinsic.
+    auto IntrinsicID = [&]() -> unsigned {
+      switch (BuiltinID) {
+      case Builtin::BI__builtin_ptrauth_auth:
+        return llvm::Intrinsic::ptrauth_auth;
+      case Builtin::BI__builtin_ptrauth_auth_and_resign:
+        return llvm::Intrinsic::ptrauth_resign;
+      case Builtin::BI__builtin_ptrauth_blend_discriminator:
+        return llvm::Intrinsic::ptrauth_blend;
+      case Builtin::BI__builtin_ptrauth_sign_generic_data:
+        return llvm::Intrinsic::ptrauth_sign_generic;
+      case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
+        return llvm::Intrinsic::ptrauth_sign;
+      case Builtin::BI__builtin_ptrauth_strip:
+        return llvm::Intrinsic::ptrauth_strip;
+      }
+      llvm_unreachable("bad ptrauth intrinsic");
+    }();
+    auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
+    llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
+
+    if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
+        BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
+        OrigValueType->isPointerTy()) {
+      Result = Builder.CreateIntToPtr(Result, OrigValueType);
+    }
+    return RValue::get(Result);
+  }
+
   case Builtin::BI__exception_code:
   case Builtin::BI_exception_code:
     return RValue::get(EmitSEHExceptionCode());
@@ -10686,6 +10753,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
   }
 
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
+    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+    return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
+  }
+
   if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
     // Create call to __arm_sme_state and store the results to the two pointers.
     CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
@@ -18015,38 +18088,11 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
     Value *X = EmitScalarExpr(E->getArg(0));
     Value *Y = EmitScalarExpr(E->getArg(1));
     Value *S = EmitScalarExpr(E->getArg(2));
-    llvm::Type *Xty = X->getType();
-    llvm::Type *Yty = Y->getType();
-    llvm::Type *Sty = S->getType();
-    if (!Xty->isVectorTy() && !Yty->isVectorTy() && !Sty->isVectorTy()) {
-      if (Xty->isFloatingPointTy()) {
-        auto V = Builder.CreateFSub(Y, X);
-        V = Builder.CreateFMul(S, V);
-        return Builder.CreateFAdd(X, V, "dx.lerp");
-      }
-      llvm_unreachable("Scalar Lerp is only supported on floats.");
-    }
-    // A VectorSplat should have happened
-    assert(Xty->isVectorTy() && Yty->isVectorTy() && Sty->isVectorTy() &&
-           "Lerp of vector and scalar is not supported.");
-
-    [[maybe_unused]] auto *XVecTy =
-        E->getArg(0)->getType()->getAs<VectorType>();
-    [[maybe_unused]] auto *YVecTy =
-        E->getArg(1)->getType()->getAs<VectorType>();
-    [[maybe_unused]] auto *SVecTy =
-        E->getArg(2)->getType()->getAs<VectorType>();
-    // A HLSLVectorTruncation should have happend
-    assert(XVecTy->getNumElements() == YVecTy->getNumElements() &&
-           XVecTy->getNumElements() == SVecTy->getNumElements() &&
-           "Lerp requires vectors to be of the same size.");
-    assert(XVecTy->getElementType()->isRealFloatingType() &&
-           XVecTy->getElementType() == YVecTy->getElementType() &&
-           XVecTy->getElementType() == SVecTy->getElementType() &&
-           "Lerp requires float vectors to be of the same type.");
+    if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+      llvm_unreachable("lerp operand must have a float representation");
     return Builder.CreateIntrinsic(
-        /*ReturnType=*/Xty, Intrinsic::dx_lerp, ArrayRef<Value *>{X, Y, S},
-        nullptr, "dx.lerp");
+        /*ReturnType=*/X->getType(), Intrinsic::dx_lerp,
+        ArrayRef<Value *>{X, Y, S}, nullptr, "dx.lerp");
   }
   case Builtin::BI__builtin_hlsl_elementwise_frac: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
@@ -18056,6 +18102,20 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         /*ReturnType=*/Op0->getType(), Intrinsic::dx_frac,
         ArrayRef<Value *>{Op0}, nullptr, "dx.frac");
   }
+  case Builtin::BI__builtin_hlsl_elementwise_isinf: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    llvm::Type *Xty = Op0->getType();
+    llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
+    if (Xty->isVectorTy()) {
+      auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
+      retType = llvm::VectorType::get(
+          retType, ElementCount::getFixed(XVecTy->getNumElements()));
+    }
+    if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+      llvm_unreachable("isinf operand must have a float representation");
+    return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
+                                   ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
+  }
   case Builtin::BI__builtin_hlsl_mad: {
     Value *M = EmitScalarExpr(E->getArg(0));
     Value *A = EmitScalarExpr(E->getArg(1));
@@ -18083,6 +18143,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         /*ReturnType=*/Op0->getType(), Intrinsic::dx_rcp,
         ArrayRef<Value *>{Op0}, nullptr, "dx.rcp");
   }
+  case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+      llvm_unreachable("rsqrt operand must have a float representation");
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/Op0->getType(), Intrinsic::dx_rsqrt,
+        ArrayRef<Value *>{Op0}, nullptr, "dx.rsqrt");
+  }
   }
   return nullptr;
 }
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 08b1fd0..03450fc 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -453,12 +453,6 @@ ToolChain::getDefaultUnwindTableLevel(const ArgList &Args) const {
   return UnwindTableLevel::None;
 }
 
-unsigned ToolChain::GetDefaultDwarfVersion() const {
-  // TODO: Remove the RISC-V special case when R_RISCV_SET_ULEB128 linker
-  // support becomes more widely available.
-  return getTriple().isRISCV() ? 4 : 5;
-}
-
 Tool *ToolChain::getClang() const {
   if (!Clang)
     Clang.reset(new tools::Clang(*this, useIntegratedBackend()));
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index aa3b80c..3e6e295 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -321,9 +321,11 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
     }
   }
 
-  if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
-                               options::OPT_munaligned_access)) {
-    if (A->getOption().matches(options::OPT_mno_unaligned_access))
+  if (Arg *A = Args.getLastArg(
+          options::OPT_mstrict_align, options::OPT_mno_strict_align,
+          options::OPT_mno_unaligned_access, options::OPT_munaligned_access)) {
+    if (A->getOption().matches(options::OPT_mstrict_align) ||
+        A->getOption().matches(options::OPT_mno_unaligned_access))
       Features.push_back("+strict-align");
   } else if (Triple.isOSOpenBSD())
     Features.push_back("+strict-align");
diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
index ba158b9..a68368c 100644
--- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -868,12 +868,16 @@ fp16_fml_fallthrough:
     }
   }
 
-  // Kernel code has more strict alignment requirements.
-  if (KernelOrKext) {
-    Features.push_back("+strict-align");
-  } else if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
-                                      options::OPT_munaligned_access)) {
-    if (A->getOption().matches(options::OPT_munaligned_access)) {
+  if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
+                                      options::OPT_munaligned_access,
+                                      options::OPT_mstrict_align,
+                                      options::OPT_mno_strict_align)) {
+    // Kernel code has more strict alignment requirements.
+    if (KernelOrKext ||
+        A->getOption().matches(options::OPT_mno_unaligned_access) ||
+        A->getOption().matches(options::OPT_mstrict_align)) {
+      Features.push_back("+strict-align");
+    } else {
       // No v6M core supports unaligned memory access (v6M ARM ARM A3.2).
       if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m)
         D.Diag(diag::err_target_unsupported_unaligned) << "v6m";
@@ -881,8 +885,7 @@ fp16_fml_fallthrough:
       // access either.
       else if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8m_baseline)
         D.Diag(diag::err_target_unsupported_unaligned) << "v8m.base";
-    } else
-      Features.push_back("+strict-align");
+    }
   } else {
     // Assume pre-ARMv6 doesn't support unaligned accesses.
     //
diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
index 31153a6..d23f9b3 100644
--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
@@ -165,10 +165,9 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D,
     }
   }
 
-  // Select the `ual` feature determined by -m[no-]unaligned-access
-  // or the alias -m[no-]strict-align.
-  AddTargetFeature(Args, Features, options::OPT_munaligned_access,
-                   options::OPT_mno_unaligned_access, "ual");
+  // Select the `ual` feature determined by -m[no-]strict-align.
+  AddTargetFeature(Args, Features, options::OPT_mno_strict_align,
+                   options::OPT_mstrict_align, "ual");
 
   // Accept but warn about these TargetSpecific options.
   if (Arg *A = Args.getLastArgNoClaim(options::OPT_mabi_EQ))
diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index a46b44f..5165bcc 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -167,9 +167,9 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple,
     Features.push_back("-relax");
   }
 
-  // -mno-unaligned-access is default, unless -munaligned-access is specified.
-  AddTargetFeature(Args, Features, options::OPT_munaligned_access,
-                   options::OPT_mno_unaligned_access, "fast-unaligned-access");
+  // -mstrict-align is default, unless -mno-strict-align is specified.
+  AddTargetFeature(Args, Features, options::OPT_mno_strict_align,
+                   options::OPT_mstrict_align, "fast-unaligned-access");
 
   // Now add any that the user explicitly requested on the command line,
   // which may override the defaults.
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 3a7a1cf..055884d 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7199,6 +7199,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // -fno-common is the default, set -fcommon only when that flag is set.
   Args.addOptInFlag(CmdArgs, options::OPT_fcommon, options::OPT_fno_common);
 
+  if (Args.hasFlag(options::OPT_fptrauth_intrinsics,
+                   options::OPT_fno_ptrauth_intrinsics, false))
+    CmdArgs.push_back("-fptrauth-intrinsics");
+
   // -fsigned-bitfields is default, and clang doesn't yet support
   // -funsigned-bitfields.
   if (!Args.hasFlag(options::OPT_fsigned_bitfields,
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 100e712..83015b0 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -740,7 +740,8 @@ bool tools::isTLSDESCEnabled(const ToolChain &TC,
     SupportedArgument = V == "desc" || V == "trad";
     EnableTLSDESC = V == "desc";
   } else if (Triple.isX86()) {
-    SupportedArgument = V == "gnu";
+    SupportedArgument = V == "gnu" || V == "gnu2";
+    EnableTLSDESC = V == "gnu2";
   } else {
     Unsupported = true;
   }
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index df44e69..964f1ea 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -241,7 +241,9 @@ ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
     : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr),
       Whitespaces(Whitespaces), Encoding(Encoding),
       BinPackInconclusiveFunctions(BinPackInconclusiveFunctions),
-      CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {}
+      CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {
+  assert(IsCpp == Style.isCpp());
+}
 
 LineState ContinuationIndenter::getInitialState(unsigned FirstIndent,
                                                 unsigned FirstStartColumn,
@@ -406,7 +408,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
   }
   if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) ||
        (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) &&
-        State.Line->First->isNot(TT_AttributeSquare) && Style.isCpp() &&
+        State.Line->First->isNot(TT_AttributeSquare) && IsCpp &&
         // FIXME: This is a temporary workaround for the case where clang-format
         // sets BreakBeforeParameter to avoid bin packing and this creates a
         // completely unnecessary line break after a template type that isn't
@@ -677,8 +679,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
   auto &CurrentState = State.Stack.back();
 
   bool DisallowLineBreaksOnThisLine =
-      Style.LambdaBodyIndentation == FormatStyle::LBI_Signature &&
-      Style.isCpp() && [&Current] {
+      Style.LambdaBodyIndentation == FormatStyle::LBI_Signature && IsCpp &&
+      [&Current] {
         // Deal with lambda arguments in C++. The aim here is to ensure that we
         // don't over-indent lambda function bodies when lambdas are passed as
         // arguments to function calls. We do this by ensuring that either all
@@ -1091,7 +1093,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
   // Any break on this level means that the parent level has been broken
   // and we need to avoid bin packing there.
   bool NestedBlockSpecialCase =
-      (!Style.isCpp() && Current.is(tok::r_brace) && State.Stack.size() > 1 &&
+      (!IsCpp && Current.is(tok::r_brace) && State.Stack.size() > 1 &&
        State.Stack[State.Stack.size() - 2].NestedBlockInlined) ||
       (Style.Language == FormatStyle::LK_ObjC && Current.is(tok::r_brace) &&
        State.Stack.size() > 1 && !Style.ObjCBreakBeforeNestedBlockParam);
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index e64ba7e..faf65c6 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -3841,13 +3841,15 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
 }
 
 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
-  LangOptions LangOpts;
+  IsCpp = Style.isCpp();
 
   FormatStyle::LanguageStandard LexingStd = Style.Standard;
   if (LexingStd == FormatStyle::LS_Auto)
     LexingStd = FormatStyle::LS_Latest;
   if (LexingStd == FormatStyle::LS_Latest)
     LexingStd = FormatStyle::LS_Cpp20;
+
+  LangOptions LangOpts;
   LangOpts.CPlusPlus = 1;
   LangOpts.CPlusPlus11 = LexingStd >= FormatStyle::LS_Cpp11;
   LangOpts.CPlusPlus14 = LexingStd >= FormatStyle::LS_Cpp14;
@@ -3858,10 +3860,8 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) {
   // the sequence "<::" will be unconditionally treated as "[:".
   // Cf. Lexer::LexTokenInternal.
   LangOpts.Digraphs = LexingStd >= FormatStyle::LS_Cpp11;
-
   LangOpts.LineComment = 1;
-  bool AlternativeOperators = Style.isCpp();
-  LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
+  LangOpts.CXXOperatorNames = IsCpp;
   LangOpts.Bool = 1;
   LangOpts.ObjC = 1;
   LangOpts.MicrosoftExt = 1;    // To get kw___try, kw___finally.
@@ -3943,6 +3943,8 @@ const char *DefaultFormatStyle = "file";
 
 const char *DefaultFallbackStyle = "LLVM";
 
+bool IsCpp = false;
+
 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
 loadAndParseConfigFile(StringRef ConfigFile, llvm::vfs::FileSystem *FS,
                        FormatStyle *Style, bool AllowUnknownOptions) {
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index 4fb70ff..665b2e4 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -78,15 +78,15 @@ static SmallVector<StringRef> CppNonKeywordTypes = {
     "uint32_t", "uint64_t",  "uint8_t", "uintptr_t",
 };
 
-bool FormatToken::isTypeName(bool IsCpp) const {
+bool FormatToken::isTypeName() const {
   return is(TT_TypeName) || isSimpleTypeSpecifier() ||
          (IsCpp && is(tok::identifier) &&
           std::binary_search(CppNonKeywordTypes.begin(),
                              CppNonKeywordTypes.end(), TokenText));
 }
 
-bool FormatToken::isTypeOrIdentifier(bool IsCpp) const {
-  return isTypeName(IsCpp) || isOneOf(tok::kw_auto, tok::identifier);
+bool FormatToken::isTypeOrIdentifier() const {
+  return isTypeName() || isOneOf(tok::kw_auto, tok::identifier);
 }
 
 bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const {
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index f4566e4..ee96d07 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -676,9 +676,9 @@ public:
   /// Determine whether the token is a simple-type-specifier.
   [[nodiscard]] bool isSimpleTypeSpecifier() const;
 
-  [[nodiscard]] bool isTypeName(bool IsCpp) const;
+  [[nodiscard]] bool isTypeName() const;
 
-  [[nodiscard]] bool isTypeOrIdentifier(bool IsCpp) const;
+  [[nodiscard]] bool isTypeOrIdentifier() const;
 
   bool isObjCAccessSpecifier() const {
     return is(tok::at) && Next &&
@@ -823,7 +823,7 @@ public:
 
   /// Returns whether the token is the left square bracket of a C++
   /// structured binding declaration.
-  bool isCppStructuredBinding(bool IsCpp) const {
+  bool isCppStructuredBinding() const {
     if (!IsCpp || isNot(tok::l_square))
       return false;
     const FormatToken *T = this;
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 036f7e6..717eb6b 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -34,6 +34,7 @@ FormatTokenLexer::FormatTokenLexer(
       Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
       FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
       MacroBlockEndRegex(Style.MacroBlockEnd) {
+  assert(IsCpp == Style.isCpp());
   Lex.reset(new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts));
   Lex->SetKeepWhitespaceMode(true);
 
@@ -114,7 +115,7 @@ void FormatTokenLexer::tryMergePreviousTokens() {
     return;
   if (tryMergeForEach())
     return;
-  if (Style.isCpp() && tryTransformTryUsageForC())
+  if (IsCpp && tryTransformTryUsageForC())
     return;
 
   if (Style.isJavaScript() || Style.isCSharp()) {
@@ -1341,7 +1342,7 @@ FormatToken *FormatTokenLexer::getNextToken() {
     Column = FormatTok->LastLineColumnWidth;
   }
 
-  if (Style.isCpp()) {
+  if (IsCpp) {
     auto *Identifier = FormatTok->Tok.getIdentifierInfo();
     auto it = Macros.find(Identifier);
     if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp
index c263530..a44c090 100644
--- a/clang/lib/Format/QualifierAlignmentFixer.cpp
+++ b/clang/lib/Format/QualifierAlignmentFixer.cpp
@@ -268,13 +268,11 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeRight(
   if (isPossibleMacro(TypeToken))
     return Tok;
 
-  const bool IsCpp = Style.isCpp();
-
   // The case `const long long int volatile` -> `long long int const volatile`
   // The case `long const long int volatile` -> `long long int const volatile`
   // The case `long long volatile int const` -> `long long int const volatile`
   // The case `const long long volatile int` -> `long long int const volatile`
-  if (TypeToken->isTypeName(IsCpp)) {
+  if (TypeToken->isTypeName()) {
     // The case `const decltype(foo)` -> `const decltype(foo)`
     // The case `const typeof(foo)` -> `const typeof(foo)`
     // The case `const _Atomic(foo)` -> `const _Atomic(foo)`
@@ -282,10 +280,8 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeRight(
       return Tok;
 
     const FormatToken *LastSimpleTypeSpecifier = TypeToken;
-    while (isQualifierOrType(LastSimpleTypeSpecifier->getNextNonComment(),
-                             IsCpp)) {
+    while (isQualifierOrType(LastSimpleTypeSpecifier->getNextNonComment()))
       LastSimpleTypeSpecifier = LastSimpleTypeSpecifier->getNextNonComment();
-    }
 
     rotateTokens(SourceMgr, Fixes, Tok, LastSimpleTypeSpecifier,
                  /*Left=*/false);
@@ -295,7 +291,7 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeRight(
   // The case  `unsigned short const` -> `unsigned short const`
   // The case:
   // `unsigned short volatile const` -> `unsigned short const volatile`
-  if (PreviousCheck && PreviousCheck->isTypeName(IsCpp)) {
+  if (PreviousCheck && PreviousCheck->isTypeName()) {
     if (LastQual != Tok)
       rotateTokens(SourceMgr, Fixes, Tok, LastQual, /*Left=*/false);
     return Tok;
@@ -412,11 +408,11 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeLeft(
   // The case `volatile long long const int` -> `const volatile long long int`
   // The case `const long long volatile int` -> `const volatile long long int`
   // The case `long volatile long int const` -> `const volatile long long int`
-  if (const bool IsCpp = Style.isCpp(); TypeToken->isTypeName(IsCpp)) {
+  if (TypeToken->isTypeName()) {
     const FormatToken *LastSimpleTypeSpecifier = TypeToken;
     while (isConfiguredQualifierOrType(
         LastSimpleTypeSpecifier->getPreviousNonComment(),
-        ConfiguredQualifierTokens, IsCpp)) {
+        ConfiguredQualifierTokens)) {
       LastSimpleTypeSpecifier =
           LastSimpleTypeSpecifier->getPreviousNonComment();
     }
@@ -531,7 +527,9 @@ LeftRightQualifierAlignmentFixer::LeftRightQualifierAlignmentFixer(
     const std::string &Qualifier,
     const std::vector<tok::TokenKind> &QualifierTokens, bool RightAlign)
     : TokenAnalyzer(Env, Style), Qualifier(Qualifier), RightAlign(RightAlign),
-      ConfiguredQualifierTokens(QualifierTokens) {}
+      ConfiguredQualifierTokens(QualifierTokens) {
+  IsCpp = Style.isCpp();
+}
 
 std::pair<tooling::Replacements, unsigned>
 LeftRightQualifierAlignmentFixer::analyze(
@@ -614,16 +612,15 @@ void prepareLeftRightOrderingForQualifierAlignmentFixer(
   }
 }
 
-bool LeftRightQualifierAlignmentFixer::isQualifierOrType(const FormatToken *Tok,
-                                                         bool IsCpp) {
+bool LeftRightQualifierAlignmentFixer::isQualifierOrType(
+    const FormatToken *Tok) {
   return Tok &&
-         (Tok->isTypeName(IsCpp) || Tok->is(tok::kw_auto) || isQualifier(Tok));
+         (Tok->isTypeName() || Tok->is(tok::kw_auto) || isQualifier(Tok));
 }
 
 bool LeftRightQualifierAlignmentFixer::isConfiguredQualifierOrType(
-    const FormatToken *Tok, const std::vector<tok::TokenKind> &Qualifiers,
-    bool IsCpp) {
-  return Tok && (Tok->isTypeName(IsCpp) || Tok->is(tok::kw_auto) ||
+    const FormatToken *Tok, const std::vector<tok::TokenKind> &Qualifiers) {
+  return Tok && (Tok->isTypeName() || Tok->is(tok::kw_auto) ||
                  isConfiguredQualifier(Tok, Qualifiers));
 }
 
diff --git a/clang/lib/Format/QualifierAlignmentFixer.h b/clang/lib/Format/QualifierAlignmentFixer.h
index e1cc27e..e922d80 100644
--- a/clang/lib/Format/QualifierAlignmentFixer.h
+++ b/clang/lib/Format/QualifierAlignmentFixer.h
@@ -71,11 +71,10 @@ public:
                                  tok::TokenKind QualifierType);
 
   // Is the Token a simple or qualifier type
-  static bool isQualifierOrType(const FormatToken *Tok, bool IsCpp = true);
+  static bool isQualifierOrType(const FormatToken *Tok);
   static bool
   isConfiguredQualifierOrType(const FormatToken *Tok,
-                              const std::vector<tok::TokenKind> &Qualifiers,
-                              bool IsCpp = true);
+                              const std::vector<tok::TokenKind> &Qualifiers);
 
   // Is the Token likely a Macro
   static bool isPossibleMacro(const FormatToken *Tok);
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index d7b84e3..e464c2b 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -84,7 +84,7 @@ static bool isKeywordWithCondition(const FormatToken &Tok) {
 }
 
 /// Returns \c true if the token starts a C++ attribute, \c false otherwise.
-static bool isCppAttribute(bool IsCpp, const FormatToken &Tok) {
+static bool isCppAttribute(const FormatToken &Tok) {
   if (!IsCpp || !Tok.startsSequence(tok::l_square, tok::l_square))
     return false;
   // The first square bracket is part of an ObjC array literal
@@ -126,7 +126,8 @@ public:
                    const AdditionalKeywords &Keywords,
                    SmallVector<ScopeType> &Scopes)
       : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
-        IsCpp(Style.isCpp()), Keywords(Keywords), Scopes(Scopes) {
+        Keywords(Keywords), Scopes(Scopes) {
+    assert(IsCpp == Style.isCpp());
     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
     resetTokenMetadata();
   }
@@ -562,7 +563,7 @@ private:
           (CurrentToken->is(tok::l_paren) && CurrentToken->Next &&
            CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret));
       if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
-           CurrentToken->Previous->isTypeName(IsCpp)) &&
+           CurrentToken->Previous->isTypeName()) &&
           !(CurrentToken->is(tok::l_brace) ||
             (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen))) {
         Contexts.back().IsExpression = false;
@@ -682,7 +683,7 @@ private:
 
     const bool IsInnerSquare = Contexts.back().InCpp11AttributeSpecifier;
     const bool IsCpp11AttributeSpecifier =
-        isCppAttribute(IsCpp, *Left) || IsInnerSquare;
+        isCppAttribute(*Left) || IsInnerSquare;
 
     // Treat C# Attributes [STAThread] much like C++ attributes [[...]].
     bool IsCSharpAttributeSpecifier =
@@ -690,7 +691,7 @@ private:
         Contexts.back().InCSharpAttributeSpecifier;
 
     bool InsideInlineASM = Line.startsWith(tok::kw_asm);
-    bool IsCppStructuredBinding = Left->isCppStructuredBinding(IsCpp);
+    bool IsCppStructuredBinding = Left->isCppStructuredBinding();
     bool StartsObjCMethodExpr =
         !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates &&
         IsCpp && !IsCpp11AttributeSpecifier && !IsCSharpAttributeSpecifier &&
@@ -2573,7 +2574,7 @@ private:
       return true;
 
     // MyClass a;
-    if (PreviousNotConst->isTypeName(IsCpp))
+    if (PreviousNotConst->isTypeName())
       return true;
 
     // type[] a in Java
@@ -2688,7 +2689,7 @@ private:
     if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const,
                           tok::kw_requires, tok::kw_throw, tok::arrow,
                           Keywords.kw_override, Keywords.kw_final) ||
-        isCppAttribute(IsCpp, *Tok.Next)) {
+        isCppAttribute(*Tok.Next)) {
       return false;
     }
 
@@ -2704,10 +2705,9 @@ private:
     }
 
     // Heuristically try to determine whether the parentheses contain a type.
-    auto IsQualifiedPointerOrReference = [this](FormatToken *T) {
+    auto IsQualifiedPointerOrReference = [](FormatToken *T) {
       // This is used to handle cases such as x = (foo *const)&y;
-      assert(!T->isTypeName(IsCpp) && "Should have already been checked");
-      (void)IsCpp; // Avoid -Wunused-lambda-capture when assertion is disabled.
+      assert(!T->isTypeName() && "Should have already been checked");
       // Strip trailing qualifiers such as const or volatile when checking
       // whether the parens could be a cast to a pointer/reference type.
       while (T) {
@@ -2739,7 +2739,7 @@ private:
     bool ParensAreType =
         !Tok.Previous ||
         Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) ||
-        Tok.Previous->isTypeName(IsCpp) ||
+        Tok.Previous->isTypeName() ||
         IsQualifiedPointerOrReference(Tok.Previous);
     bool ParensCouldEndDecl =
         Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
@@ -3010,7 +3010,6 @@ private:
   AnnotatedLine &Line;
   FormatToken *CurrentToken;
   bool AutoFound;
-  bool IsCpp;
   const AdditionalKeywords &Keywords;
 
   SmallVector<ScopeType> &Scopes;
@@ -3585,7 +3584,7 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
 
 // This function heuristically determines whether 'Current' starts the name of a
 // function declaration.
-static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
+static bool isFunctionDeclarationName(const FormatToken &Current,
                                       const AnnotatedLine &Line,
                                       FormatToken *&ClosingParen) {
   assert(Current.Previous);
@@ -3596,8 +3595,7 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
   if (!Current.Tok.getIdentifierInfo())
     return false;
 
-  auto skipOperatorName =
-      [IsCpp](const FormatToken *Next) -> const FormatToken * {
+  auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
     for (; Next; Next = Next->Next) {
       if (Next->is(TT_OverloadedOperatorLParen))
         return Next;
@@ -3616,8 +3614,8 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
         Next = Next->Next;
         continue;
       }
-      if ((Next->isTypeName(IsCpp) || Next->is(tok::identifier)) &&
-          Next->Next && Next->Next->isPointerOrReference()) {
+      if ((Next->isTypeName() || Next->is(tok::identifier)) && Next->Next &&
+          Next->Next->isPointerOrReference()) {
         // For operator void*(), operator char*(), operator Foo*().
         Next = Next->Next;
         continue;
@@ -3665,7 +3663,7 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
         }
         if (Next->isNot(tok::identifier))
           return false;
-      } else if (isCppAttribute(IsCpp, *Next)) {
+      } else if (isCppAttribute(*Next)) {
         Next = Next->MatchingParen;
         if (!Next)
           return false;
@@ -3714,7 +3712,7 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
       Tok = Tok->MatchingParen;
       continue;
     }
-    if (Tok->is(tok::kw_const) || Tok->isTypeName(IsCpp) ||
+    if (Tok->is(tok::kw_const) || Tok->isTypeName() ||
         Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) {
       return true;
     }
@@ -3776,8 +3774,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const {
     if (Tok->Previous->EndsCppAttributeGroup)
       AfterLastAttribute = Tok;
     if (const bool IsCtorOrDtor = Tok->is(TT_CtorDtorDeclName);
-        IsCtorOrDtor ||
-        isFunctionDeclarationName(IsCpp, *Tok, Line, ClosingParen)) {
+        IsCtorOrDtor || isFunctionDeclarationName(*Tok, Line, ClosingParen)) {
       if (!IsCtorOrDtor)
         Tok->setFinalizedType(TT_FunctionDeclarationName);
       LineIsFunctionDeclaration = true;
@@ -4377,7 +4374,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
     if (Left.Tok.isLiteral())
       return true;
     // for (auto a = 0, b = 0; const auto & c : {1, 2, 3})
-    if (Left.isTypeOrIdentifier(IsCpp) && Right.Next && Right.Next->Next &&
+    if (Left.isTypeOrIdentifier() && Right.Next && Right.Next->Next &&
         Right.Next->Next->is(TT_RangeBasedForLoopColon)) {
       return getTokenPointerOrReferenceAlignment(Right) !=
              FormatStyle::PAS_Left;
@@ -4420,8 +4417,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
     if (Right.is(tok::l_brace) && Right.is(BK_Block))
       return true;
     // for (auto a = 0, b = 0; const auto& c : {1, 2, 3})
-    if (Left.Previous && Left.Previous->isTypeOrIdentifier(IsCpp) &&
-        Right.Next && Right.Next->is(TT_RangeBasedForLoopColon)) {
+    if (Left.Previous && Left.Previous->isTypeOrIdentifier() && Right.Next &&
+        Right.Next->is(TT_RangeBasedForLoopColon)) {
       return getTokenPointerOrReferenceAlignment(Left) !=
              FormatStyle::PAS_Right;
     }
@@ -4459,7 +4456,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
   if (Right.isPointerOrReference()) {
     const FormatToken *Previous = &Left;
     while (Previous && Previous->isNot(tok::kw_operator)) {
-      if (Previous->is(tok::identifier) || Previous->isTypeName(IsCpp)) {
+      if (Previous->is(tok::identifier) || Previous->isTypeName()) {
         Previous = Previous->getPreviousNonComment();
         continue;
       }
@@ -4648,7 +4645,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
   if (!Style.isVerilog() &&
       (Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
                     tok::r_paren) ||
-       Left.isTypeName(IsCpp)) &&
+       Left.isTypeName()) &&
       Right.is(tok::l_brace) && Right.getNextNonComment() &&
       Right.isNot(BK_Block)) {
     return false;
diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h
index a631e5f..b0931e8 100644
--- a/clang/lib/Format/TokenAnnotator.h
+++ b/clang/lib/Format/TokenAnnotator.h
@@ -212,7 +212,9 @@ private:
 class TokenAnnotator {
 public:
   TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
-      : Style(Style), IsCpp(Style.isCpp()), Keywords(Keywords) {}
+      : Style(Style), Keywords(Keywords) {
+    assert(IsCpp == Style.isCpp());
+  }
 
   /// Adapts the indent levels of comment lines to the indent of the
   /// subsequent line.
@@ -260,8 +262,6 @@ private:
 
   const FormatStyle &Style;
 
-  bool IsCpp;
-
   const AdditionalKeywords &Keywords;
 
   SmallVector<ScopeType> Scopes;
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index e3f0af1..a1f6ce0 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -159,14 +159,16 @@ UnwrappedLineParser::UnwrappedLineParser(
     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
     IdentifierTable &IdentTable)
     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
-      CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()),
-      Keywords(Keywords), CommentPragmasRegex(Style.CommentPragmas),
-      Tokens(nullptr), Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
+      CurrentLines(&Lines), Style(Style), Keywords(Keywords),
+      CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
+      Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
                        ? IG_Rejected
                        : IG_Inited),
       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
-      Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
+      Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {
+  assert(IsCpp == Style.isCpp());
+}
 
 void UnwrappedLineParser::reset() {
   PPBranchLevel = -1;
@@ -1865,7 +1867,7 @@ void UnwrappedLineParser::parseStructuralElement(
     case tok::caret:
       nextToken();
       // Block return type.
-      if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(IsCpp)) {
+      if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName()) {
         nextToken();
         // Return types: pointers are ok too.
         while (FormatTok->is(tok::star))
@@ -2221,7 +2223,7 @@ bool UnwrappedLineParser::tryToParseLambda() {
   bool InTemplateParameterList = false;
 
   while (FormatTok->isNot(tok::l_brace)) {
-    if (FormatTok->isTypeName(IsCpp)) {
+    if (FormatTok->isTypeName()) {
       nextToken();
       continue;
     }
@@ -2338,7 +2340,7 @@ bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
                      !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
                                         tok::kw_co_yield, tok::kw_co_return)) ||
                     Previous->closesScope())) ||
-      LeftSquare->isCppStructuredBinding(IsCpp)) {
+      LeftSquare->isCppStructuredBinding()) {
     return false;
   }
   if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
@@ -3414,7 +3416,7 @@ bool clang::format::UnwrappedLineParser::parseRequires() {
     break;
   }
   default:
-    if (PreviousNonComment->isTypeOrIdentifier(IsCpp)) {
+    if (PreviousNonComment->isTypeOrIdentifier()) {
       // This is a requires clause.
       parseRequiresClause(RequiresToken);
       return true;
@@ -3477,7 +3479,7 @@ bool clang::format::UnwrappedLineParser::parseRequires() {
       --OpenAngles;
       break;
     default:
-      if (NextToken->isTypeName(IsCpp)) {
+      if (NextToken->isTypeName()) {
         FormatTok = Tokens->setPosition(StoredPosition);
         parseRequiresExpression(RequiresToken);
         return false;
@@ -3962,7 +3964,7 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
       if (FormatTok->is(tok::l_square)) {
         FormatToken *Previous = FormatTok->Previous;
         if (!Previous || (Previous->isNot(tok::r_paren) &&
-                          !Previous->isTypeOrIdentifier(IsCpp))) {
+                          !Previous->isTypeOrIdentifier())) {
           // Don't try parsing a lambda if we had a closing parenthesis before,
           // it was probably a pointer to an array: int (*)[].
           if (!tryToParseLambda())
diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index 619fbb2..1403533 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -324,7 +324,6 @@ private:
   llvm::BitVector DeclarationScopeStack;
 
   const FormatStyle &Style;
-  bool IsCpp;
   const AdditionalKeywords &Keywords;
 
   llvm::Regex CommentPragmasRegex;
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 451bdb9..2a21a9d 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3293,6 +3293,17 @@ static void ParseAPINotesArgs(APINotesOptions &Opts, ArgList &Args,
     Opts.ModuleSearchPaths.push_back(A->getValue());
 }
 
+static void GeneratePointerAuthArgs(const LangOptions &Opts,
+                                    ArgumentConsumer Consumer) {
+  if (Opts.PointerAuthIntrinsics)
+    GenerateArg(Consumer, OPT_fptrauth_intrinsics);
+}
+
+static void ParsePointerAuthArgs(LangOptions &Opts, ArgList &Args,
+                                 DiagnosticsEngine &Diags) {
+  Opts.PointerAuthIntrinsics = Args.hasArg(OPT_fptrauth_intrinsics);
+}
+
 /// Check if input file kind and language standard are compatible.
 static bool IsInputCompatibleWithStandard(InputKind IK,
                                           const LangStandard &S) {
@@ -4014,6 +4025,7 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
 
       if (TT.getArch() == llvm::Triple::UnknownArch ||
           !(TT.getArch() == llvm::Triple::aarch64 || TT.isPPC() ||
+            TT.getArch() == llvm::Triple::systemz ||
             TT.getArch() == llvm::Triple::nvptx ||
             TT.getArch() == llvm::Triple::nvptx64 ||
             TT.getArch() == llvm::Triple::amdgcn ||
@@ -4612,6 +4624,8 @@ bool CompilerInvocation::CreateFromArgsImpl(
                         Res.getFileSystemOpts().WorkingDir);
   ParseAPINotesArgs(Res.getAPINotesOpts(), Args, Diags);
 
+  ParsePointerAuthArgs(LangOpts, Args, Diags);
+
   ParseLangArgs(LangOpts, Args, DashX, T, Res.getPreprocessorOpts().Includes,
                 Diags);
   if (Res.getFrontendOpts().ProgramAction == frontend::RewriteObjC)
@@ -4842,6 +4856,7 @@ void CompilerInvocationBase::generateCC1CommandLine(
   GenerateTargetArgs(getTargetOpts(), Consumer);
   GenerateHeaderSearchArgs(getHeaderSearchOpts(), Consumer);
   GenerateAPINotesArgs(getAPINotesOpts(), Consumer);
+  GeneratePointerAuthArgs(getLangOpts(), Consumer);
   GenerateLangArgs(getLangOpts(), Consumer, T, getFrontendOpts().DashX);
   GenerateCodeGenArgs(getCodeGenOpts(), Consumer, T,
                       getFrontendOpts().OutputFile, &getLangOpts());
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index b9a966b..902e33b 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -214,6 +214,7 @@ set(x86_files
   popcntintrin.h
   prfchiintrin.h
   prfchwintrin.h
+  ptrauth.h
   ptwriteintrin.h
   raointintrin.h
   rdpruintrin.h
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 45f8544..718fb9a 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -526,6 +526,39 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac)
 float4 frac(float4);
 
 //===----------------------------------------------------------------------===//
+// isinf builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T isinf(T x)
+/// \brief Determines if the specified value \a x  is infinite.
+/// \param x The specified input value.
+///
+/// Returns a value of the same size as the input, with a value set
+/// to True if the x parameter is +INF or -INF. Otherwise, False.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf)
+bool isinf(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf)
+bool2 isinf(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf)
+bool3 isinf(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf)
+bool4 isinf(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf)
+bool isinf(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf)
+bool2 isinf(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf)
+bool3 isinf(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_isinf)
+bool4 isinf(float4);
+
+//===----------------------------------------------------------------------===//
 // lerp builtins
 //===----------------------------------------------------------------------===//
 
@@ -1154,6 +1187,39 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
 double4 rcp(double4);
 
 //===----------------------------------------------------------------------===//
+// rsqrt builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T rsqrt(T x)
+/// \brief Returns the reciprocal of the square root of the specified value.
+/// ie 1 / sqrt( \a x).
+/// \param x The specified input value.
+///
+/// This function uses the following formula: 1 / sqrt(x).
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half rsqrt(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half2 rsqrt(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half3 rsqrt(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half4 rsqrt(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float rsqrt(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float2 rsqrt(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float3 rsqrt(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float4 rsqrt(float4);
+
+//===----------------------------------------------------------------------===//
 // round builtins
 //===----------------------------------------------------------------------===//
 
diff --git a/clang/lib/Headers/module.modulemap b/clang/lib/Headers/module.modulemap
index 56a13f6..8741968f 100644
--- a/clang/lib/Headers/module.modulemap
+++ b/clang/lib/Headers/module.modulemap
@@ -315,3 +315,8 @@ module opencl_c {
   header "opencl-c.h"
   header "opencl-c-base.h"
 }
+
+module ptrauth {
+  header "ptrauth.h"
+  export *
+}
diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h
new file mode 100644
index 0000000..56c3c36
--- /dev/null
+++ b/clang/lib/Headers/ptrauth.h
@@ -0,0 +1,185 @@
+/*===---- ptrauth.h - Pointer authentication -------------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __PTRAUTH_H
+#define __PTRAUTH_H
+
+typedef enum {
+  ptrauth_key_asia = 0,
+  ptrauth_key_asib = 1,
+  ptrauth_key_asda = 2,
+  ptrauth_key_asdb = 3,
+} ptrauth_key;
+
+/* An integer type of the appropriate size for a discriminator argument. */
+typedef __UINTPTR_TYPE__ ptrauth_extra_data_t;
+
+/* An integer type of the appropriate size for a generic signature. */
+typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
+
+/* A signed pointer value embeds the original pointer together with
+   a signature that attests to the validity of that pointer.  Because
+   this signature must use only "spare" bits of the pointer, a
+   signature's validity is probabilistic in practice: it is unlikely
+   but still plausible that an invalidly-derived signature will
+   somehow equal the correct signature and therefore successfully
+   authenticate.  Nonetheless, this scheme provides a strong degree
+   of protection against certain kinds of attacks. */
+
+/* Authenticating a pointer that was not signed with the given key
+   and extra-data value will (likely) fail by trapping. */
+
+#if __has_feature(ptrauth_intrinsics)
+
+/* Strip the signature from a value without authenticating it.
+
+   If the value is a function pointer, the result will not be a
+   legal function pointer because of the missing signature, and
+   attempting to call it will result in an authentication failure.
+
+   The value must be an expression of pointer type.
+   The key must be a constant expression of type ptrauth_key.
+   The result will have the same type as the original value. */
+#define ptrauth_strip(__value, __key) __builtin_ptrauth_strip(__value, __key)
+
+/* Blend a constant discriminator into the given pointer-like value
+   to form a new discriminator.  Not all bits of the inputs are
+   guaranteed to contribute to the result.
+
+   On arm64e, the integer must fall within the range of a uint16_t;
+   other bits may be ignored.
+
+   The first argument must be an expression of pointer type.
+   The second argument must be an expression of integer type.
+   The result will have type uintptr_t. */
+#define ptrauth_blend_discriminator(__pointer, __integer)                      \
+  __builtin_ptrauth_blend_discriminator(__pointer, __integer)
+
+/* Add a signature to the given pointer value using a specific key,
+   using the given extra data as a salt to the signing process.
+
+   This operation does not authenticate the original value and is
+   therefore potentially insecure if an attacker could possibly
+   control that value.
+
+   The value must be an expression of pointer type.
+   The key must be a constant expression of type ptrauth_key.
+   The extra data must be an expression of pointer or integer type;
+   if an integer, it will be coerced to ptrauth_extra_data_t.
+   The result will have the same type as the original value. */
+#define ptrauth_sign_unauthenticated(__value, __key, __data)                   \
+  __builtin_ptrauth_sign_unauthenticated(__value, __key, __data)
+
+/* Authenticate a pointer using one scheme and resign it using another.
+
+   If the result is subsequently authenticated using the new scheme, that
+   authentication is guaranteed to fail if and only if the initial
+   authentication failed.
+
+   The value must be an expression of pointer type.
+   The key must be a constant expression of type ptrauth_key.
+   The extra data must be an expression of pointer or integer type;
+   if an integer, it will be coerced to ptrauth_extra_data_t.
+   The result will have the same type as the original value.
+
+   This operation is guaranteed to not leave the intermediate value
+   available for attack before it is re-signed.
+
+   Do not pass a null pointer to this function. A null pointer
+   will not successfully authenticate.
+
+   This operation traps if the authentication fails. */
+#define ptrauth_auth_and_resign(__value, __old_key, __old_data, __new_key,     \
+                                __new_data)                                    \
+  __builtin_ptrauth_auth_and_resign(__value, __old_key, __old_data, __new_key, \
+                                    __new_data)
+
+/* Authenticate a data pointer.
+
+   The value must be an expression of non-function pointer type.
+   The key must be a constant expression of type ptrauth_key.
+   The extra data must be an expression of pointer or integer type;
+   if an integer, it will be coerced to ptrauth_extra_data_t.
+   The result will have the same type as the original value.
+
+   This operation traps if the authentication fails. */
+#define ptrauth_auth_data(__value, __old_key, __old_data)                      \
+  __builtin_ptrauth_auth(__value, __old_key, __old_data)
+
+/* Compute a signature for the given pair of pointer-sized values.
+   The order of the arguments is significant.
+
+   Like a pointer signature, the resulting signature depends on
+   private key data and therefore should not be reliably reproducible
+   by attackers.  That means that this can be used to validate the
+   integrity of arbitrary data by storing a signature for that data
+   alongside it, then checking that the signature is still valid later.
+   Data which exceeds two pointers in size can be signed by either
+   computing a tree of generic signatures or just signing an ordinary
+   cryptographic hash of the data.
+
+   The result has type ptrauth_generic_signature_t.  However, it may
+   not have as many bits of entropy as that type's width would suggest;
+   some implementations are known to compute a compressed signature as
+   if the arguments were a pointer and a discriminator.
+
+   The arguments must be either pointers or integers; if integers, they
+   will be coerce to uintptr_t. */
+#define ptrauth_sign_generic_data(__value, __data)                             \
+  __builtin_ptrauth_sign_generic_data(__value, __data)
+
+#else
+
+#define ptrauth_strip(__value, __key)                                          \
+  ({                                                                           \
+    (void)__key;                                                               \
+    __value;                                                                   \
+  })
+
+#define ptrauth_blend_discriminator(__pointer, __integer)                      \
+  ({                                                                           \
+    (void)__pointer;                                                           \
+    (void)__integer;                                                           \
+    ((ptrauth_extra_data_t)0);                                                 \
+  })
+
+#define ptrauth_sign_unauthenticated(__value, __key, __data)                   \
+  ({                                                                           \
+    (void)__key;                                                               \
+    (void)__data;                                                              \
+    __value;                                                                   \
+  })
+
+#define ptrauth_auth_and_resign(__value, __old_key, __old_data, __new_key,     \
+                                __new_data)                                    \
+  ({                                                                           \
+    (void)__old_key;                                                           \
+    (void)__old_data;                                                          \
+    (void)__new_key;                                                           \
+    (void)__new_data;                                                          \
+    __value;                                                                   \
+  })
+
+#define ptrauth_auth_data(__value, __old_key, __old_data)                      \
+  ({                                                                           \
+    (void)__old_key;                                                           \
+    (void)__old_data;                                                          \
+    __value;                                                                   \
+  })
+
+#define ptrauth_sign_generic_data(__value, __data)                             \
+  ({                                                                           \
+    (void)__value;                                                             \
+    (void)__data;                                                              \
+    ((ptrauth_generic_signature_t)0);                                          \
+  })
+
+#endif /* __has_feature(ptrauth_intrinsics) */
+
+#endif /* __PTRAUTH_H */
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a5f42b6..e303a7c 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1971,6 +1971,191 @@ static bool SemaOpenCLBuiltinToAddr(Sema &S, unsigned BuiltinID,
   return false;
 }
 
+namespace {
+enum PointerAuthOpKind {
+  PAO_Strip,
+  PAO_Sign,
+  PAO_Auth,
+  PAO_SignGeneric,
+  PAO_Discriminator,
+  PAO_BlendPointer,
+  PAO_BlendInteger
+};
+}
+
+static bool checkPointerAuthEnabled(Sema &S, Expr *E) {
+  if (S.getLangOpts().PointerAuthIntrinsics)
+    return false;
+
+  S.Diag(E->getExprLoc(), diag::err_ptrauth_disabled) << E->getSourceRange();
+  return true;
+}
+
+static bool checkPointerAuthKey(Sema &S, Expr *&Arg) {
+  // Convert it to type 'int'.
+  if (convertArgumentToType(S, Arg, S.Context.IntTy))
+    return true;
+
+  // Value-dependent expressions are okay; wait for template instantiation.
+  if (Arg->isValueDependent())
+    return false;
+
+  unsigned KeyValue;
+  return S.checkConstantPointerAuthKey(Arg, KeyValue);
+}
+
+bool Sema::checkConstantPointerAuthKey(Expr *Arg, unsigned &Result) {
+  // Attempt to constant-evaluate the expression.
+  std::optional<llvm::APSInt> KeyValue = Arg->getIntegerConstantExpr(Context);
+  if (!KeyValue) {
+    Diag(Arg->getExprLoc(), diag::err_expr_not_ice)
+        << 0 << Arg->getSourceRange();
+    return true;
+  }
+
+  // Ask the target to validate the key parameter.
+  if (!Context.getTargetInfo().validatePointerAuthKey(*KeyValue)) {
+    llvm::SmallString<32> Value;
+    {
+      llvm::raw_svector_ostream Str(Value);
+      Str << *KeyValue;
+    }
+
+    Diag(Arg->getExprLoc(), diag::err_ptrauth_invalid_key)
+        << Value << Arg->getSourceRange();
+    return true;
+  }
+
+  Result = KeyValue->getZExtValue();
+  return false;
+}
+
+static bool checkPointerAuthValue(Sema &S, Expr *&Arg,
+                                  PointerAuthOpKind OpKind) {
+  if (Arg->hasPlaceholderType()) {
+    ExprResult R = S.CheckPlaceholderExpr(Arg);
+    if (R.isInvalid())
+      return true;
+    Arg = R.get();
+  }
+
+  auto AllowsPointer = [](PointerAuthOpKind OpKind) {
+    return OpKind != PAO_BlendInteger;
+  };
+  auto AllowsInteger = [](PointerAuthOpKind OpKind) {
+    return OpKind == PAO_Discriminator || OpKind == PAO_BlendInteger ||
+           OpKind == PAO_SignGeneric;
+  };
+
+  // Require the value to have the right range of type.
+  QualType ExpectedTy;
+  if (AllowsPointer(OpKind) && Arg->getType()->isPointerType()) {
+    ExpectedTy = Arg->getType().getUnqualifiedType();
+  } else if (AllowsPointer(OpKind) && Arg->getType()->isNullPtrType()) {
+    ExpectedTy = S.Context.VoidPtrTy;
+  } else if (AllowsInteger(OpKind) &&
+             Arg->getType()->isIntegralOrUnscopedEnumerationType()) {
+    ExpectedTy = S.Context.getUIntPtrType();
+
+  } else {
+    // Diagnose the failures.
+    S.Diag(Arg->getExprLoc(), diag::err_ptrauth_value_bad_type)
+        << unsigned(OpKind == PAO_Discriminator  ? 1
+                    : OpKind == PAO_BlendPointer ? 2
+                    : OpKind == PAO_BlendInteger ? 3
+                                                 : 0)
+        << unsigned(AllowsInteger(OpKind) ? (AllowsPointer(OpKind) ? 2 : 1) : 0)
+        << Arg->getType() << Arg->getSourceRange();
+    return true;
+  }
+
+  // Convert to that type.  This should just be an lvalue-to-rvalue
+  // conversion.
+  if (convertArgumentToType(S, Arg, ExpectedTy))
+    return true;
+
+  // Warn about null pointers for non-generic sign and auth operations.
+  if ((OpKind == PAO_Sign || OpKind == PAO_Auth) &&
+      Arg->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNull)) {
+    S.Diag(Arg->getExprLoc(), OpKind == PAO_Sign
+                                  ? diag::warn_ptrauth_sign_null_pointer
+                                  : diag::warn_ptrauth_auth_null_pointer)
+        << Arg->getSourceRange();
+  }
+
+  return false;
+}
+
+static ExprResult SemaPointerAuthStrip(Sema &S, CallExpr *Call) {
+  if (checkArgCount(S, Call, 2))
+    return ExprError();
+  if (checkPointerAuthEnabled(S, Call))
+    return ExprError();
+  if (checkPointerAuthValue(S, Call->getArgs()[0], PAO_Strip) ||
+      checkPointerAuthKey(S, Call->getArgs()[1]))
+    return ExprError();
+
+  Call->setType(Call->getArgs()[0]->getType());
+  return Call;
+}
+
+static ExprResult SemaPointerAuthBlendDiscriminator(Sema &S, CallExpr *Call) {
+  if (checkArgCount(S, Call, 2))
+    return ExprError();
+  if (checkPointerAuthEnabled(S, Call))
+    return ExprError();
+  if (checkPointerAuthValue(S, Call->getArgs()[0], PAO_BlendPointer) ||
+      checkPointerAuthValue(S, Call->getArgs()[1], PAO_BlendInteger))
+    return ExprError();
+
+  Call->setType(S.Context.getUIntPtrType());
+  return Call;
+}
+
+static ExprResult SemaPointerAuthSignGenericData(Sema &S, CallExpr *Call) {
+  if (checkArgCount(S, Call, 2))
+    return ExprError();
+  if (checkPointerAuthEnabled(S, Call))
+    return ExprError();
+  if (checkPointerAuthValue(S, Call->getArgs()[0], PAO_SignGeneric) ||
+      checkPointerAuthValue(S, Call->getArgs()[1], PAO_Discriminator))
+    return ExprError();
+
+  Call->setType(S.Context.getUIntPtrType());
+  return Call;
+}
+
+static ExprResult SemaPointerAuthSignOrAuth(Sema &S, CallExpr *Call,
+                                            PointerAuthOpKind OpKind) {
+  if (checkArgCount(S, Call, 3))
+    return ExprError();
+  if (checkPointerAuthEnabled(S, Call))
+    return ExprError();
+  if (checkPointerAuthValue(S, Call->getArgs()[0], OpKind) ||
+      checkPointerAuthKey(S, Call->getArgs()[1]) ||
+      checkPointerAuthValue(S, Call->getArgs()[2], PAO_Discriminator))
+    return ExprError();
+
+  Call->setType(Call->getArgs()[0]->getType());
+  return Call;
+}
+
+static ExprResult SemaPointerAuthAuthAndResign(Sema &S, CallExpr *Call) {
+  if (checkArgCount(S, Call, 5))
+    return ExprError();
+  if (checkPointerAuthEnabled(S, Call))
+    return ExprError();
+  if (checkPointerAuthValue(S, Call->getArgs()[0], PAO_Auth) ||
+      checkPointerAuthKey(S, Call->getArgs()[1]) ||
+      checkPointerAuthValue(S, Call->getArgs()[2], PAO_Discriminator) ||
+      checkPointerAuthKey(S, Call->getArgs()[3]) ||
+      checkPointerAuthValue(S, Call->getArgs()[4], PAO_Discriminator))
+    return ExprError();
+
+  Call->setType(Call->getArgs()[0]->getType());
+  return Call;
+}
+
 static ExprResult SemaBuiltinLaunder(Sema &S, CallExpr *TheCall) {
   if (checkArgCount(S, TheCall, 1))
     return ExprError();
@@ -2683,6 +2868,18 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     }
     break;
   }
+  case Builtin::BI__builtin_ptrauth_strip:
+    return SemaPointerAuthStrip(*this, TheCall);
+  case Builtin::BI__builtin_ptrauth_blend_discriminator:
+    return SemaPointerAuthBlendDiscriminator(*this, TheCall);
+  case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
+    return SemaPointerAuthSignOrAuth(*this, TheCall, PAO_Sign);
+  case Builtin::BI__builtin_ptrauth_auth:
+    return SemaPointerAuthSignOrAuth(*this, TheCall, PAO_Auth);
+  case Builtin::BI__builtin_ptrauth_sign_generic_data:
+    return SemaPointerAuthSignGenericData(*this, TheCall);
+  case Builtin::BI__builtin_ptrauth_auth_and_resign:
+    return SemaPointerAuthAuthAndResign(*this, TheCall);
   // OpenCL v2.0, s6.13.16 - Pipe functions
   case Builtin::BIread_pipe:
   case Builtin::BIwrite_pipe:
@@ -5233,10 +5430,6 @@ bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) {
                            TheCall->getArg(1)->getEndLoc());
         retValue = true;
       }
-
-      if (!retValue)
-        TheCall->setType(VecTyA->getElementType());
-
       return retValue;
     }
   }
@@ -5250,11 +5443,12 @@ bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) {
   return true;
 }
 
-bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
-  QualType ExpectedType = S->Context.FloatTy;
+bool CheckArgsTypesAreCorrect(
+    Sema *S, CallExpr *TheCall, QualType ExpectedType,
+    llvm::function_ref<bool(clang::QualType PassedType)> Check) {
   for (unsigned i = 0; i < TheCall->getNumArgs(); ++i) {
     QualType PassedType = TheCall->getArg(i)->getType();
-    if (!PassedType->hasFloatingRepresentation()) {
+    if (Check(PassedType)) {
       if (auto *VecTyA = PassedType->getAs<VectorType>())
         ExpectedType = S->Context.getVectorType(
             ExpectedType, VecTyA->getNumElements(), VecTyA->getVectorKind());
@@ -5267,6 +5461,35 @@ bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
   return false;
 }
 
+bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
+  auto checkAllFloatTypes = [](clang::QualType PassedType) -> bool {
+    return !PassedType->hasFloatingRepresentation();
+  };
+  return CheckArgsTypesAreCorrect(S, TheCall, S->Context.FloatTy,
+                                  checkAllFloatTypes);
+}
+
+bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
+  auto checkFloatorHalf = [](clang::QualType PassedType) -> bool {
+    clang::QualType BaseType =
+        PassedType->isVectorType()
+            ? PassedType->getAs<clang::VectorType>()->getElementType()
+            : PassedType;
+    return !BaseType->isHalfType() && !BaseType->isFloat32Type();
+  };
+  return CheckArgsTypesAreCorrect(S, TheCall, S->Context.FloatTy,
+                                  checkFloatorHalf);
+}
+
+void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall,
+                                QualType ReturnType) {
+  auto *VecTyA = TheCall->getArg(0)->getType()->getAs<VectorType>();
+  if (VecTyA)
+    ReturnType = S->Context.getVectorType(ReturnType, VecTyA->getNumElements(),
+                                          VectorKind::Generic);
+  TheCall->setType(ReturnType);
+}
+
 // Note: returning true in this case results in CheckBuiltinFunctionCall
 // returning an ExprError
 bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
@@ -5285,12 +5508,27 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
-  case Builtin::BI__builtin_hlsl_elementwise_rcp:
+  case Builtin::BI__builtin_hlsl_elementwise_rcp: {
+    if (CheckAllArgsHaveFloatRepresentation(this, TheCall))
+      return true;
+    if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
+      return true;
+    break;
+  }
+  case Builtin::BI__builtin_hlsl_elementwise_rsqrt:
   case Builtin::BI__builtin_hlsl_elementwise_frac: {
+    if (CheckFloatOrHalfRepresentations(this, TheCall))
+      return true;
     if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
       return true;
-    if (CheckAllArgsHaveFloatRepresentation(this, TheCall))
+    break;
+  }
+  case Builtin::BI__builtin_hlsl_elementwise_isinf: {
+    if (CheckFloatOrHalfRepresentations(this, TheCall))
       return true;
+    if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
+      return true;
+    SetElementTypeAsReturnType(this, TheCall, this->Context.BoolTy);
     break;
   }
   case Builtin::BI__builtin_hlsl_lerp: {
@@ -5300,7 +5538,7 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     if (SemaBuiltinElementwiseTernaryMath(TheCall))
       return true;
-    if (CheckAllArgsHaveFloatRepresentation(this, TheCall))
+    if (CheckFloatOrHalfRepresentations(this, TheCall))
       return true;
     break;
   }
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 0cc0cba..e9ad7bb 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -14366,7 +14366,8 @@ StmtResult Sema::ActOnOpenMPTargetParallelForSimdDirective(
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
-  for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target_parallel_for);
+  for (int ThisCaptureLevel =
+           getOpenMPCaptureLevels(OMPD_target_parallel_for_simd);
        ThisCaptureLevel > 1; --ThisCaptureLevel) {
     CS = cast<CapturedStmt>(CS->getCapturedStmt());
     // 1.2.2 OpenMP Language Terminology
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 8ef8bfd..dc97201 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1727,6 +1727,7 @@ Decl *TemplateDeclInstantiator::VisitClassTemplateDecl(ClassTemplateDecl *D) {
     assert(!Owner->isDependentContext());
     Inst->setLexicalDeclContext(Owner);
     RecordInst->setLexicalDeclContext(Owner);
+    Inst->setObjectOfFriendDecl();
 
     if (PrevClassTemplate) {
       Inst->setCommonPtr(PrevClassTemplate->getCommonPtr());
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index d04e1c7..86f64bf 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -281,11 +281,18 @@ bool clang::CanElideDeclDef(const Decl *D) {
 
     if (FD->isDependentContext())
       return false;
+
+    if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
+      return false;
   }
 
   if (auto *VD = dyn_cast<VarDecl>(D)) {
     if (!VD->getDeclContext()->getRedeclContext()->isFileContext() ||
-        VD->isInline() || VD->isConstexpr() || isa<ParmVarDecl>(VD))
+        VD->isInline() || VD->isConstexpr() || isa<ParmVarDecl>(VD) ||
+        // Constant initialized variable may not affect the ABI, but they
+        // may be used in constant evaluation in the frontend, so we have
+        // to remain them.
+        VD->hasConstantInitialization())
       return false;
 
     if (VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
diff --git a/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp b/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp
index a803528..7042f1a 100644
--- a/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp
+++ b/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp
@@ -190,6 +190,17 @@ static bool isCapturedByReference(ExplodedNode *N, const DeclRefExpr *DR) {
   return FD->getType()->isReferenceType();
 }
 
+static bool isFoundInStmt(const Stmt *S, const VarDecl *VD) {
+  if (const DeclStmt *DS = dyn_cast<DeclStmt>(S)) {
+    for (const Decl *D : DS->decls()) {
+      // Once we reach the declaration of the VD we can return.
+      if (D->getCanonicalDecl() == VD)
+        return true;
+    }
+  }
+  return false;
+}
+
 // A loop counter is considered escaped if:
 // case 1: It is a global variable.
 // case 2: It is a reference parameter or a reference capture.
@@ -219,13 +230,19 @@ static bool isPossiblyEscaped(ExplodedNode *N, const DeclRefExpr *DR) {
       continue;
     }
 
-    if (const DeclStmt *DS = dyn_cast<DeclStmt>(S)) {
-      for (const Decl *D : DS->decls()) {
-        // Once we reach the declaration of the VD we can return.
-        if (D->getCanonicalDecl() == VD)
-          return false;
+    if (isFoundInStmt(S, VD)) {
+      return false;
+    }
+
+    if (const auto *SS = dyn_cast<SwitchStmt>(S)) {
+      if (const auto *CST = dyn_cast<CompoundStmt>(SS->getBody())) {
+        for (const Stmt *CB : CST->body()) {
+          if (isFoundInStmt(CB, VD))
+            return false;
+        }
       }
     }
+
     // Check the usage of the pass-by-ref function calls and adress-of operator
     // on VD and reference initialized by VD.
     ASTContext &ASTCtx =
diff --git a/clang/test/AST/Interp/arrays.cpp b/clang/test/AST/Interp/arrays.cpp
index 4b112d7..2443992 100644
--- a/clang/test/AST/Interp/arrays.cpp
+++ b/clang/test/AST/Interp/arrays.cpp
@@ -429,18 +429,13 @@ namespace Incomplete {
                           // both-note {{array-to-pointer decay of array member without known bound}}
 
   /// These are from test/SemaCXX/constant-expression-cxx11.cpp
-  /// and are the only tests using the 'indexing of array without known bound' diagnostic.
-  /// We currently diagnose them differently.
-  extern int arr[]; // expected-note 3{{declared here}}
+  extern int arr[];
   constexpr int *c = &arr[1]; // both-error  {{must be initialized by a constant expression}} \
-                              // ref-note {{indexing of array without known bound}} \
-                              // expected-note {{read of non-constexpr variable 'arr'}}
+                              // both-note {{indexing of array without known bound}}
   constexpr int *d = &arr[1]; // both-error  {{must be initialized by a constant expression}} \
-                              // ref-note {{indexing of array without known bound}} \
-                              // expected-note {{read of non-constexpr variable 'arr'}}
+                              // both-note {{indexing of array without known bound}}
   constexpr int *e = arr + 1; // both-error  {{must be initialized by a constant expression}} \
-                              // ref-note {{indexing of array without known bound}} \
-                              // expected-note {{read of non-constexpr variable 'arr'}}
+                              // both-note {{indexing of array without known bound}}
 }
 
 namespace GH69115 {
diff --git a/clang/test/AST/Interp/builtin-functions.cpp b/clang/test/AST/Interp/builtin-functions.cpp
index 08fca84..6c8df99 100644
--- a/clang/test/AST/Interp/builtin-functions.cpp
+++ b/clang/test/AST/Interp/builtin-functions.cpp
@@ -510,3 +510,12 @@ namespace bswap {
   int h4 = __builtin_bswap32(0x1234) == 0x34120000 ? 1 : f();
   int h5 = __builtin_bswap64(0x1234) == 0x3412000000000000 ? 1 : f();
 }
+
+#define CFSTR __builtin___CFStringMakeConstantString
+void test7(void) {
+  const void *X;
+  X = CFSTR("\242"); // both-warning {{input conversion stopped}}
+  X = CFSTR("\0"); // no-warning
+  X = CFSTR(242); // both-error {{cannot initialize a parameter of type 'const char *' with an rvalue of type 'int'}}
+  X = CFSTR("foo", "bar"); // both-error {{too many arguments to function call}}
+}
diff --git a/clang/test/AST/Interp/c.c b/clang/test/AST/Interp/c.c
index 8de6139..5caebd9 100644
--- a/clang/test/AST/Interp/c.c
+++ b/clang/test/AST/Interp/c.c
@@ -201,3 +201,11 @@ void localCompoundLiteral(void) {
        // pedantic-ref-warning {{use of an empty initializer}}
   };
 }
+
+/// struct copy
+struct StrA {int a; };
+const struct StrA sa = { 12 };
+const struct StrA * const sb = &sa;
+const struct StrA sc = *sb;
+_Static_assert(sc.a == 12, ""); // pedantic-ref-warning {{GNU extension}} \
+                                // pedantic-expected-warning {{GNU extension}}
diff --git a/clang/test/AST/Interp/c23.c b/clang/test/AST/Interp/c23.c
new file mode 100644
index 0000000..cf1bf4d
--- /dev/null
+++ b/clang/test/AST/Interp/c23.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -std=c23 -fexperimental-new-constant-interpreter -verify=expected,both %s
+// RUN: %clang_cc1 -std=c23 -verify=ref,both %s
+
+
+
+const _Bool inf1 =  (1.0/0.0 == __builtin_inf());
+constexpr _Bool inf2 = (1.0/0.0 == __builtin_inf()); // both-error {{must be initialized by a constant expression}} \
+                                                     // both-note {{division by zero}}
+constexpr _Bool inf3 = __builtin_inf() == __builtin_inf();
+
+
diff --git a/clang/test/AST/Interp/complex.c b/clang/test/AST/Interp/complex.c
index b5f30b8..a39f831 100644
--- a/clang/test/AST/Interp/complex.c
+++ b/clang/test/AST/Interp/complex.c
@@ -19,3 +19,12 @@ const _Complex float FC = {0.0f, 0.0f};
 _Static_assert(!FC, "");
 const _Complex float FI = {0, 0};
 _Static_assert(!FI, "");
+
+
+/// Make sure we're stripping the _Atomic part from the
+/// complex type.
+void testComplexFloat(_Atomic(_Complex float) *fp) {
+  _Atomic(_Complex float) x = 2.0f;
+  _Complex float f = *fp;
+  *fp = f;
+}
diff --git a/clang/test/AST/Interp/complex.cpp b/clang/test/AST/Interp/complex.cpp
index 6a42afc..d4e3d5a 100644
--- a/clang/test/AST/Interp/complex.cpp
+++ b/clang/test/AST/Interp/complex.cpp
@@ -313,3 +313,53 @@ namespace Cmp {
   static_assert((0.0 + 0.0j) > (0.0 + 0.0j)); // both-error {{invalid operands to binary expression}}
   static_assert((0.0 + 0.0j) ^ (0.0 + 0.0j)); // both-error {{invalid operands to binary expression}}
 }
+
+/// From test/SemaCXX/constant-expression-cxx11.cpp
+///
+/// Some of the diagnostics we emit are different than the one of the
+/// current interpreter.
+///
+/// FIXME: For the '&test3 + 1' test, we are _not_ creating an explicit pointer variable
+/// anywhere and so the &test3+1 is the same as __imag(test3) for us.
+namespace ComplexConstexpr {
+  constexpr _Complex float test1 = {};
+  constexpr _Complex float test2 = {1};
+  constexpr _Complex double test3 = {1,2};
+  constexpr _Complex int test4 = {4};
+  constexpr _Complex int test5 = 4;
+  constexpr _Complex int test6 = {5,6};
+  typedef _Complex float fcomplex;
+  constexpr fcomplex test7 = fcomplex();
+
+  constexpr const double &t2r = __real test3;
+  constexpr const double &t2i = __imag test3;
+  static_assert(&t2r + 1 == &t2i, "");
+  static_assert(t2r == 1.0, "");
+  static_assert(t2i == 2.0, "");
+  constexpr const double *t2p = &t2r;
+  static_assert(t2p[-1] == 0.0, ""); // both-error {{constant expr}} \
+                                     // both-note {{cannot refer to element -1 of array of 2 elements}}
+  static_assert(t2p[0] == 1.0, "");
+  static_assert(t2p[1] == 2.0, "");
+  static_assert(t2p[2] == 0.0, ""); // both-error {{constant expr}} \
+                                    // both-note {{one-past-the-end pointer}}
+  static_assert(t2p[3] == 0.0, ""); // both-error {{constant expr}} \
+                                    // both-note {{cannot refer to element 3 of array of 2 elements}}
+  constexpr _Complex float *p = 0;
+  constexpr float pr = __real *p; // both-error {{constant expr}} \
+                                  // ref-note {{cannot access real component of null}} \
+                                  // expected-note {{read of dereferenced null pointer}}
+  constexpr float pi = __imag *p; // both-error {{constant expr}} \
+                                  // ref-note {{cannot access imaginary component of null}} \
+                                  // expected-note {{cannot perform pointer arithmetic on null pointer}}
+  constexpr const _Complex double *q = &test3 + 1;
+  constexpr double qr = __real *q; // ref-error {{constant expr}} \
+                                   // ref-note {{cannot access real component of pointer past the end}}
+  constexpr double qi = __imag *q; // both-error {{constant expr}} \
+                                   // ref-note {{cannot access imaginary component of pointer past the end}} \
+                                   // expected-note {{read of dereferenced one-past-the-end pointer}}
+
+  static_assert(__real test6 == 5, "");
+  static_assert(__imag test6 == 6, "");
+  static_assert(&__imag test6 == &__real test6 + 1, "");
+}
diff --git a/clang/test/AST/Interp/cxx23.cpp b/clang/test/AST/Interp/cxx23.cpp
index 127b589..042e296 100644
--- a/clang/test/AST/Interp/cxx23.cpp
+++ b/clang/test/AST/Interp/cxx23.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=ref20,all,all-20 %s
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=ref20,all,all20 %s
 // RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=ref23,all %s
-// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=expected20,all,all-20 %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=expected20,all,all20 %s -fexperimental-new-constant-interpreter
 // RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=expected23,all %s -fexperimental-new-constant-interpreter
 
 /// FIXME: The new interpreter is missing all the 'control flows through...' diagnostics.
@@ -108,9 +108,9 @@ namespace StaticOperators {
   static_assert(f2() == 3);
 
   struct S1 {
-    constexpr S1() { // all-20-error {{never produces a constant expression}}
+    constexpr S1() { // all20-error {{never produces a constant expression}}
       throw; // all-note {{not valid in a constant expression}} \
-             // all-20-note {{not valid in a constant expression}}
+             // all20-note {{not valid in a constant expression}}
     }
     static constexpr int operator()() { return 3; } // ref20-warning {{C++23 extension}} \
                                                     // expected20-warning {{C++23 extension}}
@@ -121,3 +121,28 @@ namespace StaticOperators {
 
 
 }
+
+int test_in_lambdas() {
+  auto c = [](int n) constexpr {
+    if (n == 0)
+      return 0;
+    else
+      goto test; // all-note {{subexpression not valid in a constant expression}} \
+                 // all20-warning {{use of this statement in a constexpr function is a C++23 extension}}
+  test:
+    return 1;
+  };
+  c(0);
+  constexpr auto A = c(1); // all-error {{must be initialized by a constant expression}} \
+                           // all-note {{in call to}}
+  return 0;
+}
+
+/// PackIndexExpr.
+template <auto... p>
+struct check_ice {
+    enum e {
+        x = p...[0] // all-warning {{is a C++2c extension}}
+    };
+};
+static_assert(check_ice<42>::x == 42);
diff --git a/clang/test/AST/Interp/cxx98.cpp b/clang/test/AST/Interp/cxx98.cpp
index 73e4537..ba6bcd9 100644
--- a/clang/test/AST/Interp/cxx98.cpp
+++ b/clang/test/AST/Interp/cxx98.cpp
@@ -18,13 +18,12 @@ template struct C<cval>;
 
 /// FIXME: This example does not get properly diagnosed in the new interpreter.
 extern const int recurse1;
-const int recurse2 = recurse1; // both-note {{declared here}}
+const int recurse2 = recurse1; // ref-note {{declared here}}
 const int recurse1 = 1;
 int array1[recurse1];
 int array2[recurse2]; // ref-warning 2{{variable length array}} \
                       // ref-note {{initializer of 'recurse2' is not a constant expression}} \
                       // expected-warning {{variable length array}} \
-                      // expected-note {{read of non-const variable 'recurse2'}} \
                       // expected-error {{variable length array}}
 
 int NCI; // both-note {{declared here}}
@@ -39,3 +38,10 @@ struct V {
                          // both-error {{constructor cannot have a return type}}
 };
 _Static_assert(V().c[0], ""); // both-error {{is not an integral constant expression}}
+
+struct C0 {
+  template<typename U> static U Data; // both-warning {{C++14 extension}}
+  template<typename U> static const U Data<U*> = U();
+};
+const int c0_test = C0::Data<int*>;
+_Static_assert(c0_test == 0, "");
diff --git a/clang/test/AST/Interp/if.cpp b/clang/test/AST/Interp/if.cpp
index 86ae8de6..37289d6 100644
--- a/clang/test/AST/Interp/if.cpp
+++ b/clang/test/AST/Interp/if.cpp
@@ -1,8 +1,5 @@
-// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fexperimental-new-constant-interpreter %s -verify
-// RUN: %clang_cc1 -std=c++23 -fsyntax-only %s -verify=ref
-
-// expected-no-diagnostics
-// ref-no-diagnostics
+// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fexperimental-new-constant-interpreter %s -verify=expected,both
+// RUN: %clang_cc1 -std=c++23 -fsyntax-only %s -verify=ref,both
 
 namespace ConstEval {
   constexpr int f() {
@@ -51,3 +48,13 @@ namespace InitDecl {
   }
   static_assert(attrs() == 1, "");
 };
+
+/// The faulty if statement creates a RecoveryExpr with contains-errors,
+/// but the execution will never reach that.
+constexpr char g(char const (&x)[2]) {
+    return 'x';
+  if (auto [a, b] = x) // both-error {{an array type is not allowed here}} \
+                       // both-warning {{ISO C++17 does not permit structured binding declaration in a condition}}
+    ;
+}
+static_assert(g("x") == 'x');
diff --git a/clang/test/AST/Interp/literals.cpp b/clang/test/AST/Interp/literals.cpp
index 7ae8499..0a9580b 100644
--- a/clang/test/AST/Interp/literals.cpp
+++ b/clang/test/AST/Interp/literals.cpp
@@ -1113,6 +1113,9 @@ namespace InvalidDeclRefs {
   int b03 = 3; // both-note {{declared here}}
   static_assert(b03, ""); // both-error {{not an integral constant expression}} \
                           // both-note {{read of non-const variable}}
+
+  extern int var;
+  constexpr int *varp = &var; // Ok.
 }
 
 namespace NonConstReads {
@@ -1182,8 +1185,16 @@ namespace incdecbool {
 }
 
 #if __cplusplus >= 201402L
+/// NOTE: The diagnostics of the two interpreters are a little
+/// different here, but they both make sense.
 constexpr int externvar1() { // both-error {{never produces a constant expression}}
-  extern char arr[]; // both-note {{declared here}}
-  return arr[0]; // both-note {{read of non-constexpr variable 'arr'}}
+  extern char arr[]; // ref-note {{declared here}}
+   return arr[0]; // ref-note {{read of non-constexpr variable 'arr'}} \
+                  // expected-note {{array-to-pointer decay of array member without known bound is not supported}}
 }
 #endif
+
+namespace Extern {
+  constexpr extern char Oops = 1;
+  static_assert(Oops == 1, "");
+}
diff --git a/clang/test/AST/Interp/ms.cpp b/clang/test/AST/Interp/ms.cpp
new file mode 100644
index 0000000..99716e9
--- /dev/null
+++ b/clang/test/AST/Interp/ms.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -verify=ref,both %s -fms-extensions
+// RUN: %clang_cc1 -verify=expected,both %s -fexperimental-new-constant-interpreter -fms-extensions
+
+// ref-no-diagnostics
+// expected-no-diagnostics
+
+/// Used to assert because the two parameters to _rotl do not have the same type.
+static_assert(_rotl(0x01, 5) == 32);
diff --git a/clang/test/AST/Interp/records.cpp b/clang/test/AST/Interp/records.cpp
index 7ddc56c..769e48f 100644
--- a/clang/test/AST/Interp/records.cpp
+++ b/clang/test/AST/Interp/records.cpp
@@ -1238,3 +1238,9 @@ namespace InvalidCtorInitializer {
   // no crash on evaluating the constexpr ctor.
   constexpr int Z = X().Y; // both-error {{constexpr variable 'Z' must be initialized by a constant expression}}
 }
+
+extern int f(); // both-note {{here}}
+struct HasNonConstExprMemInit {
+  int x = f(); // both-note {{non-constexpr function}}
+  constexpr HasNonConstExprMemInit() {} // both-error {{never produces a constant expression}}
+};
diff --git a/clang/test/Analysis/loop-unrolling.cpp b/clang/test/Analysis/loop-unrolling.cpp
index fc1fb06..66a828a 100644
--- a/clang/test/Analysis/loop-unrolling.cpp
+++ b/clang/test/Analysis/loop-unrolling.cpp
@@ -547,3 +547,15 @@ void capture_implicitly_by_ref_as_loop_counter() {
     }
   };
 }
+
+
+void test_escaping_on_var_before_switch_case_no_crash(int c) {
+  // https://github.com/llvm/llvm-project/issues/68819
+  switch (c) {
+    int i; // no-crash: The declaration of `i` is found here.
+    case 0: {
+      for (i = 0; i < 16; i++) {}
+      break;
+    }
+  }
+}
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index 05ea1c7..8bd68d9 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -156,4 +156,10 @@ int rndrrs(uint64_t *__addr) {
   return __builtin_arm_rndrrs(__addr);
 }
 
+// CHECK-LABEL: @trap(
+// CHECK: call void @llvm.aarch64.break(i32 42)
+void trap() {
+  __builtin_arm_trap(42);
+}
+
 // CHECK: ![[M0]] = !{!"1:2:3:4:5"}
diff --git a/clang/test/CodeGen/ptrauth-intrinsics.c b/clang/test/CodeGen/ptrauth-intrinsics.c
new file mode 100644
index 0000000..17f28dd
--- /dev/null
+++ b/clang/test/CodeGen/ptrauth-intrinsics.c
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-intrinsics -emit-llvm %s  -o - | FileCheck %s
+
+void (*fnptr)(void);
+long int_discriminator;
+void *ptr_discriminator;
+long signature;
+
+// CHECK-LABEL: define void @test_auth()
+void test_auth() {
+  // CHECK:      [[PTR:%.*]] = load ptr, ptr @fnptr,
+  // CHECK-NEXT: [[DISC0:%.*]] = load ptr, ptr @ptr_discriminator,
+  // CHECK-NEXT: [[T0:%.*]] = ptrtoint ptr [[PTR]] to i64
+  // CHECK-NEXT: [[DISC:%.*]] = ptrtoint ptr [[DISC0]] to i64
+  // CHECK-NEXT: [[T1:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[T0]], i32 0, i64 [[DISC]])
+  // CHECK-NEXT: [[RESULT:%.*]] = inttoptr  i64 [[T1]] to ptr
+  // CHECK-NEXT: store ptr [[RESULT]], ptr @fnptr,
+  fnptr = __builtin_ptrauth_auth(fnptr, 0, ptr_discriminator);
+}
+
+// CHECK-LABEL: define void @test_strip()
+void test_strip() {
+  // CHECK:      [[PTR:%.*]] = load ptr, ptr @fnptr,
+  // CHECK-NEXT: [[T0:%.*]] = ptrtoint ptr [[PTR]] to i64
+  // CHECK-NEXT: [[T1:%.*]] = call i64 @llvm.ptrauth.strip(i64 [[T0]], i32 0)
+  // CHECK-NEXT: [[RESULT:%.*]] = inttoptr  i64 [[T1]] to ptr
+  // CHECK-NEXT: store ptr [[RESULT]], ptr @fnptr,
+  fnptr = __builtin_ptrauth_strip(fnptr, 0);
+}
+
+// CHECK-LABEL: define void @test_sign_unauthenticated()
+void test_sign_unauthenticated() {
+  // CHECK:      [[PTR:%.*]] = load ptr, ptr @fnptr,
+  // CHECK-NEXT: [[DISC0:%.*]] = load ptr, ptr @ptr_discriminator,
+  // CHECK-NEXT: [[T0:%.*]] = ptrtoint ptr [[PTR]] to i64
+  // CHECK-NEXT: [[DISC:%.*]] = ptrtoint ptr [[DISC0]] to i64
+  // CHECK-NEXT: [[T1:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[T0]], i32 0, i64 [[DISC]])
+  // CHECK-NEXT: [[RESULT:%.*]] = inttoptr  i64 [[T1]] to ptr
+  // CHECK-NEXT: store ptr [[RESULT]], ptr @fnptr,
+  fnptr = __builtin_ptrauth_sign_unauthenticated(fnptr, 0, ptr_discriminator);
+}
+
+// CHECK-LABEL: define void @test_auth_and_resign()
+void test_auth_and_resign() {
+  // CHECK:      [[PTR:%.*]] = load ptr, ptr @fnptr,
+  // CHECK-NEXT: [[DISC0:%.*]] = load ptr, ptr @ptr_discriminator,
+  // CHECK-NEXT: [[T0:%.*]] = ptrtoint ptr [[PTR]] to i64
+  // CHECK-NEXT: [[DISC:%.*]] = ptrtoint ptr [[DISC0]] to i64
+  // CHECK-NEXT: [[T1:%.*]] = call i64 @llvm.ptrauth.resign(i64 [[T0]], i32 0, i64 [[DISC]], i32 3, i64 15)
+  // CHECK-NEXT: [[RESULT:%.*]] = inttoptr  i64 [[T1]] to ptr
+  // CHECK-NEXT: store ptr [[RESULT]], ptr @fnptr,
+  fnptr = __builtin_ptrauth_auth_and_resign(fnptr, 0, ptr_discriminator, 3, 15);
+}
+
+// CHECK-LABEL: define void @test_blend_discriminator()
+void test_blend_discriminator() {
+  // CHECK:      [[PTR:%.*]] = load ptr, ptr @fnptr,
+  // CHECK-NEXT: [[DISC:%.*]] = load i64, ptr @int_discriminator,
+  // CHECK-NEXT: [[T0:%.*]] = ptrtoint ptr [[PTR]] to i64
+  // CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[T0]], i64 [[DISC]])
+  // CHECK-NEXT: store i64 [[RESULT]], ptr @int_discriminator,
+  int_discriminator = __builtin_ptrauth_blend_discriminator(fnptr, int_discriminator);
+}
+
+// CHECK-LABEL: define void @test_sign_generic_data()
+void test_sign_generic_data() {
+  // CHECK:      [[PTR:%.*]] = load ptr, ptr @fnptr,
+  // CHECK-NEXT: [[DISC0:%.*]] = load ptr, ptr @ptr_discriminator,
+  // CHECK-NEXT: [[T0:%.*]] = ptrtoint ptr [[PTR]] to i64
+  // CHECK-NEXT: [[DISC:%.*]] = ptrtoint ptr [[DISC0]] to i64
+  // CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.ptrauth.sign.generic(i64 [[T0]], i64 [[DISC]])
+  // CHECK-NEXT: store i64 [[RESULT]], ptr @signature,
+  signature = __builtin_ptrauth_sign_generic_data(fnptr, ptr_discriminator);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/isinf.hlsl b/clang/test/CodeGenHLSL/builtins/isinf.hlsl
new file mode 100644
index 0000000..df44fc4
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/isinf.hlsl
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+
+// CHECK: define noundef i1 @
+// NATIVE_HALF: %dx.isinf = call i1 @llvm.dx.isinf.f16(
+// NO_HALF: %dx.isinf = call i1 @llvm.dx.isinf.f32(
+// CHECK: ret i1 %dx.isinf
+bool test_isinf_half(half p0) { return isinf(p0); }
+// CHECK: define noundef <2 x i1> @
+// NATIVE_HALF: %dx.isinf = call <2 x i1> @llvm.dx.isinf.v2f16
+// NO_HALF: %dx.isinf = call <2 x i1> @llvm.dx.isinf.v2f32(
+// CHECK: ret <2 x i1> %dx.isinf
+bool2 test_isinf_half2(half2 p0) { return isinf(p0); }
+// NATIVE_HALF: define noundef <3 x i1> @
+// NATIVE_HALF: %dx.isinf = call <3 x i1> @llvm.dx.isinf.v3f16
+// NO_HALF: %dx.isinf = call <3 x i1> @llvm.dx.isinf.v3f32(
+// CHECK: ret <3 x i1> %dx.isinf
+bool3 test_isinf_half3(half3 p0) { return isinf(p0); }
+// NATIVE_HALF: define noundef <4 x i1> @
+// NATIVE_HALF: %dx.isinf = call <4 x i1> @llvm.dx.isinf.v4f16
+// NO_HALF: %dx.isinf = call <4 x i1> @llvm.dx.isinf.v4f32(
+// CHECK: ret <4 x i1> %dx.isinf
+bool4 test_isinf_half4(half4 p0) { return isinf(p0); }
+
+// CHECK: define noundef i1 @
+// CHECK: %dx.isinf = call i1 @llvm.dx.isinf.f32(
+// CHECK: ret i1 %dx.isinf
+bool test_isinf_float(float p0) { return isinf(p0); }
+// CHECK: define noundef <2 x i1> @
+// CHECK: %dx.isinf = call <2 x i1> @llvm.dx.isinf.v2f32
+// CHECK: ret <2 x i1> %dx.isinf
+bool2 test_isinf_float2(float2 p0) { return isinf(p0); }
+// CHECK: define noundef <3 x i1> @
+// CHECK: %dx.isinf = call <3 x i1> @llvm.dx.isinf.v3f32
+// CHECK: ret <3 x i1> %dx.isinf
+bool3 test_isinf_float3(float3 p0) { return isinf(p0); }
+// CHECK: define noundef <4 x i1> @
+// CHECK: %dx.isinf = call <4 x i1> @llvm.dx.isinf.v4f32
+// CHECK: ret <4 x i1> %dx.isinf
+bool4 test_isinf_float4(float4 p0) { return isinf(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl
index 1f16dec..2fd5a19 100644
--- a/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl
@@ -1,27 +1,5 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
-
-
-// CHECK-LABEL: builtin_lerp_half_scalar
-// CHECK: %3 = fsub double %conv1, %conv
-// CHECK: %4 = fmul double %conv2, %3
-// CHECK: %dx.lerp = fadd double %conv, %4
-// CHECK: %conv3 = fptrunc double %dx.lerp to half
-// CHECK: ret half %conv3
-half builtin_lerp_half_scalar (half p0) {
-  return __builtin_hlsl_lerp ( p0, p0, p0 );
-}
-
-// CHECK-LABEL: builtin_lerp_float_scalar
-// CHECK: %3 = fsub double %conv1, %conv
-// CHECK: %4 = fmul double %conv2, %3
-// CHECK: %dx.lerp = fadd double %conv, %4
-// CHECK: %conv3 = fptrunc double %dx.lerp to float
-// CHECK: ret float %conv3
-float builtin_lerp_float_scalar ( float p0) {
-  return __builtin_hlsl_lerp ( p0, p0, p0 );
-}
-
 // CHECK-LABEL: builtin_lerp_half_vector
 // CHECK: %dx.lerp = call <3 x half> @llvm.dx.lerp.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2)
 // CHECK: ret <3 x half> %dx.lerp
diff --git a/clang/test/CodeGenHLSL/builtins/lerp.hlsl b/clang/test/CodeGenHLSL/builtins/lerp.hlsl
index a6b3d96..49cd04a 100644
--- a/clang/test/CodeGenHLSL/builtins/lerp.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/lerp.hlsl
@@ -6,13 +6,10 @@
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
 
-// NATIVE_HALF: %3 = fsub half %1, %0
-// NATIVE_HALF: %4 = fmul half %2, %3
-// NATIVE_HALF: %dx.lerp = fadd half %0, %4
+
+// NATIVE_HALF: %dx.lerp = call half @llvm.dx.lerp.f16(half %0, half %1, half %2)
 // NATIVE_HALF: ret half %dx.lerp
-// NO_HALF: %3 = fsub float %1, %0
-// NO_HALF: %4 = fmul float %2, %3
-// NO_HALF: %dx.lerp = fadd float %0, %4
+// NO_HALF: %dx.lerp = call float @llvm.dx.lerp.f32(float %0, float %1, float %2)
 // NO_HALF: ret float %dx.lerp
 half test_lerp_half(half p0) { return lerp(p0, p0, p0); }
 
@@ -20,37 +17,35 @@ half test_lerp_half(half p0) { return lerp(p0, p0, p0); }
 // NATIVE_HALF: ret <2 x half> %dx.lerp
 // NO_HALF: %dx.lerp = call <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2)
 // NO_HALF: ret <2 x float> %dx.lerp
-half2 test_lerp_half2(half2 p0, half2 p1) { return lerp(p0, p0, p0); }
+half2 test_lerp_half2(half2 p0) { return lerp(p0, p0, p0); }
 
 // NATIVE_HALF: %dx.lerp = call <3 x half> @llvm.dx.lerp.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2)
 // NATIVE_HALF: ret <3 x half> %dx.lerp
 // NO_HALF: %dx.lerp = call <3 x float> @llvm.dx.lerp.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2)
 // NO_HALF: ret <3 x float> %dx.lerp
-half3 test_lerp_half3(half3 p0, half3 p1) { return lerp(p0, p0, p0); }
+half3 test_lerp_half3(half3 p0) { return lerp(p0, p0, p0); }
 
 // NATIVE_HALF: %dx.lerp = call <4 x half> @llvm.dx.lerp.v4f16(<4 x half> %0, <4 x half> %1, <4 x half> %2)
 // NATIVE_HALF: ret <4 x half> %dx.lerp
 // NO_HALF: %dx.lerp = call <4 x float> @llvm.dx.lerp.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2)
 // NO_HALF: ret <4 x float> %dx.lerp
-half4 test_lerp_half4(half4 p0, half4 p1) { return lerp(p0, p0, p0); }
+half4 test_lerp_half4(half4 p0) { return lerp(p0, p0, p0); }
 
-// CHECK: %3 = fsub float %1, %0
-// CHECK: %4 = fmul float %2, %3
-// CHECK: %dx.lerp = fadd float %0, %4
+// CHECK: %dx.lerp = call float @llvm.dx.lerp.f32(float %0, float %1, float %2)
 // CHECK: ret float %dx.lerp
-float test_lerp_float(float p0, float p1) { return lerp(p0, p0, p0); }
+float test_lerp_float(float p0) { return lerp(p0, p0, p0); }
 
 // CHECK: %dx.lerp = call <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2)
 // CHECK: ret <2 x float> %dx.lerp
-float2 test_lerp_float2(float2 p0, float2 p1) { return lerp(p0, p0, p0); }
+float2 test_lerp_float2(float2 p0) { return lerp(p0, p0, p0); }
 
 // CHECK: %dx.lerp = call <3 x float> @llvm.dx.lerp.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2)
 // CHECK: ret <3 x float> %dx.lerp
-float3 test_lerp_float3(float3 p0, float3 p1) { return lerp(p0, p0, p0); }
+float3 test_lerp_float3(float3 p0) { return lerp(p0, p0, p0); }
 
 // CHECK: %dx.lerp = call <4 x float> @llvm.dx.lerp.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2)
 // CHECK: ret <4 x float> %dx.lerp
-float4 test_lerp_float4(float4 p0, float4 p1) { return lerp(p0, p0, p0); }
+float4 test_lerp_float4(float4 p0) { return lerp(p0, p0, p0); }
 
 // CHECK: %dx.lerp = call <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %splat.splat, <2 x float> %1, <2 x float> %2)
 // CHECK: ret <2 x float> %dx.lerp
diff --git a/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl b/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl
new file mode 100644
index 0000000..c87a8c4
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+
+// NATIVE_HALF: define noundef half @
+// NATIVE_HALF: %dx.rsqrt = call half @llvm.dx.rsqrt.f16(
+// NATIVE_HALF: ret half %dx.rsqrt
+// NO_HALF: define noundef float @"?test_rsqrt_half@@YA$halff@$halff@@Z"(
+// NO_HALF: %dx.rsqrt = call float @llvm.dx.rsqrt.f32(
+// NO_HALF: ret float %dx.rsqrt
+half test_rsqrt_half(half p0) { return rsqrt(p0); }
+// NATIVE_HALF: define noundef <2 x half> @
+// NATIVE_HALF: %dx.rsqrt = call <2 x half> @llvm.dx.rsqrt.v2f16
+// NATIVE_HALF: ret <2 x half> %dx.rsqrt
+// NO_HALF: define noundef <2 x float> @
+// NO_HALF: %dx.rsqrt = call <2 x float> @llvm.dx.rsqrt.v2f32(
+// NO_HALF: ret <2 x float> %dx.rsqrt
+half2 test_rsqrt_half2(half2 p0) { return rsqrt(p0); }
+// NATIVE_HALF: define noundef <3 x half> @
+// NATIVE_HALF: %dx.rsqrt = call <3 x half> @llvm.dx.rsqrt.v3f16
+// NATIVE_HALF: ret <3 x half> %dx.rsqrt
+// NO_HALF: define noundef <3 x float> @
+// NO_HALF: %dx.rsqrt = call <3 x float> @llvm.dx.rsqrt.v3f32(
+// NO_HALF: ret <3 x float> %dx.rsqrt
+half3 test_rsqrt_half3(half3 p0) { return rsqrt(p0); }
+// NATIVE_HALF: define noundef <4 x half> @
+// NATIVE_HALF: %dx.rsqrt = call <4 x half> @llvm.dx.rsqrt.v4f16
+// NATIVE_HALF: ret <4 x half> %dx.rsqrt
+// NO_HALF: define noundef <4 x float> @
+// NO_HALF: %dx.rsqrt = call <4 x float> @llvm.dx.rsqrt.v4f32(
+// NO_HALF: ret <4 x float> %dx.rsqrt
+half4 test_rsqrt_half4(half4 p0) { return rsqrt(p0); }
+
+// CHECK: define noundef float @
+// CHECK: %dx.rsqrt = call float @llvm.dx.rsqrt.f32(
+// CHECK: ret float %dx.rsqrt
+float test_rsqrt_float(float p0) { return rsqrt(p0); }
+// CHECK: define noundef <2 x float> @
+// CHECK: %dx.rsqrt = call <2 x float> @llvm.dx.rsqrt.v2f32
+// CHECK: ret <2 x float> %dx.rsqrt
+float2 test_rsqrt_float2(float2 p0) { return rsqrt(p0); }
+// CHECK: define noundef <3 x float> @
+// CHECK: %dx.rsqrt = call <3 x float> @llvm.dx.rsqrt.v3f32
+// CHECK: ret <3 x float> %dx.rsqrt
+float3 test_rsqrt_float3(float3 p0) { return rsqrt(p0); }
+// CHECK: define noundef <4 x float> @
+// CHECK: %dx.rsqrt = call <4 x float> @llvm.dx.rsqrt.v4f32
+// CHECK: ret <4 x float> %dx.rsqrt
+float4 test_rsqrt_float4(float4 p0) { return rsqrt(p0); }
diff --git a/clang/test/Driver/apple-kext-mkernel.c b/clang/test/Driver/apple-kext-mkernel.c
index f03ed4a..de905e1 100644
--- a/clang/test/Driver/apple-kext-mkernel.c
+++ b/clang/test/Driver/apple-kext-mkernel.c
@@ -13,8 +13,8 @@
 // CHECK-X86-2: "-fno-rtti"
 // CHECK-X86-2-NOT: "-fno-common"
 
-// RUN: not %clang -target x86_64-apple-darwin11 -arch armv7 -mkernel -mstrict-align -### -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-ARM %s
-// RUN: not %clang -target x86_64-apple-darwin11 -arch armv7 -mkernel -mstrict-align -### -fsyntax-only -fbuiltin -fno-builtin -fcommon -fno-common %s 2>&1 | FileCheck --check-prefix=CHECK-ARM %s
+// RUN: %clang --target=x86_64-apple-darwin11 -arch armv7 -mkernel -mstrict-align -### -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-ARM %s
+// RUN: %clang --target=x86_64-apple-darwin11 -arch armv7 -mkernel -mstrict-align -### -fsyntax-only -fbuiltin -fno-builtin -fcommon -fno-common %s 2>&1 | FileCheck --check-prefix=CHECK-ARM %s
 
 // CHECK-ARM: "-target-feature" "+long-calls"
 // CHECK-ARM: "-target-feature" "+strict-align"
diff --git a/clang/test/Driver/clang-g-opts.c b/clang/test/Driver/clang-g-opts.c
index b73602a..fdbe0b9 100644
--- a/clang/test/Driver/clang-g-opts.c
+++ b/clang/test/Driver/clang-g-opts.c
@@ -42,8 +42,3 @@
 
 // CHECK-WITH-G-STANDALONE: "-debug-info-kind=standalone"
 // CHECK-WITH-G-STANDALONE: "-dwarf-version=2"
-
-/// TODO: Special case before R_RISCV_SET_ULEB128 linker support becomes more widely available.
-// RUN: %clang -### -S %s -g --target=riscv64-linux-gnu 2>&1 | FileCheck --check-prefix=VERSION4 %s
-// RUN: %clang -### -S %s -g --target=riscv64-unknown-elf 2>&1 | FileCheck --check-prefix=VERSION4 %s
-// VERSION4: "-dwarf-version=4"
diff --git a/clang/test/Driver/loongarch-munaligned-access.c b/clang/test/Driver/loongarch-munaligned-access.c
index 44edb2e..a545679 100644
--- a/clang/test/Driver/loongarch-munaligned-access.c
+++ b/clang/test/Driver/loongarch-munaligned-access.c
@@ -1,54 +1,25 @@
 /// Test -m[no-]unaligned-access and -m[no-]strict-align options.
 
-// RUN: %clang --target=loongarch64 -munaligned-access -fsyntax-only %s -### 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=CC1-UNALIGNED
-// RUN: %clang --target=loongarch64 -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=CC1-NO-UNALIGNED
 // RUN: %clang --target=loongarch64 -mstrict-align -fsyntax-only %s -### 2>&1 | \
 // RUN:   FileCheck %s --check-prefix=CC1-NO-UNALIGNED
 // RUN: %clang --target=loongarch64 -mno-strict-align -fsyntax-only %s -### 2>&1 | \
 // RUN:   FileCheck %s --check-prefix=CC1-UNALIGNED
-// RUN: %clang --target=loongarch64 -munaligned-access -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=CC1-NO-UNALIGNED
-// RUN: %clang --target=loongarch64 -mno-unaligned-access -munaligned-access -fsyntax-only %s -### 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=CC1-UNALIGNED
 // RUN: %clang --target=loongarch64 -mstrict-align -mno-strict-align -fsyntax-only %s -### 2>&1 | \
 // RUN:   FileCheck %s --check-prefix=CC1-UNALIGNED
 // RUN: %clang --target=loongarch64 -mno-strict-align -mstrict-align -fsyntax-only %s -### 2>&1 | \
 // RUN:   FileCheck %s --check-prefix=CC1-NO-UNALIGNED
-// RUN: %clang --target=loongarch64 -munaligned-access -mstrict-align -fsyntax-only %s -### 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=CC1-NO-UNALIGNED
-// RUN: %clang --target=loongarch64 -mstrict-align -munaligned-access -fsyntax-only %s -### 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=CC1-UNALIGNED
-// RUN: %clang --target=loongarch64 -mno-unaligned-access -mno-strict-align -fsyntax-only %s -### 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=CC1-UNALIGNED
-// RUN: %clang --target=loongarch64 -mno-strict-align -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=CC1-NO-UNALIGNED
 
-// RUN: %clang --target=loongarch64 -munaligned-access -S -emit-llvm %s -o - | \
-// RUN:   FileCheck %s --check-prefix=IR-UNALIGNED
-// RUN: %clang --target=loongarch64 -mno-unaligned-access -S -emit-llvm %s -o - | \
-// RUN:   FileCheck %s --check-prefix=IR-NO-UNALIGNED
 // RUN: %clang --target=loongarch64 -mstrict-align -S -emit-llvm %s -o - | \
 // RUN:   FileCheck %s --check-prefix=IR-NO-UNALIGNED
 // RUN: %clang --target=loongarch64 -mno-strict-align -S -emit-llvm %s -o - | \
 // RUN:   FileCheck %s --check-prefix=IR-UNALIGNED
-// RUN: %clang --target=loongarch64 -munaligned-access -mno-unaligned-access -S -emit-llvm %s -o - | \
-// RUN:   FileCheck %s --check-prefix=IR-NO-UNALIGNED
-// RUN: %clang --target=loongarch64 -mno-unaligned-access -munaligned-access -S -emit-llvm %s -o - | \
-// RUN:   FileCheck %s --check-prefix=IR-UNALIGNED
 // RUN: %clang --target=loongarch64 -mstrict-align -mno-strict-align -S -emit-llvm %s -o - | \
 // RUN:   FileCheck %s --check-prefix=IR-UNALIGNED
 // RUN: %clang --target=loongarch64 -mno-strict-align -mstrict-align -S -emit-llvm %s -o - | \
 // RUN:   FileCheck %s --check-prefix=IR-NO-UNALIGNED
-// RUN: %clang --target=loongarch64 -munaligned-access -mstrict-align -S -emit-llvm %s -o - | \
-// RUN:   FileCheck %s --check-prefix=IR-NO-UNALIGNED
-// RUN: %clang --target=loongarch64 -mstrict-align -munaligned-access -S -emit-llvm %s -o - | \
-// RUN:   FileCheck %s --check-prefix=IR-UNALIGNED
-// RUN: %clang --target=loongarch64 -mno-unaligned-access -mno-strict-align -S -emit-llvm %s -o - | \
-// RUN:   FileCheck %s --check-prefix=IR-UNALIGNED
-// RUN: %clang --target=loongarch64 -mno-strict-align -mno-unaligned-access -S -emit-llvm %s -o - | \
-// RUN:   FileCheck %s --check-prefix=IR-NO-UNALIGNED
+
+// RUN: not %clang -### --target=loongarch64 -mno-unaligned-access -munaligned-access %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=ERR
 
 // CC1-UNALIGNED: "-target-feature" "+ual"
 // CC1-NO-UNALIGNED: "-target-feature" "-ual"
@@ -56,6 +27,9 @@
 // IR-UNALIGNED: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+ual{{(,.*)?}}"
 // IR-NO-UNALIGNED: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-ual{{(,.*)?}}"
 
+// ERR: error: unsupported option '-mno-unaligned-access' for target 'loongarch64'
+// ERR: error: unsupported option '-munaligned-access' for target 'loongarch64'
+
 int foo(void) {
   return 3;
 }
diff --git a/clang/test/Driver/munaligned-access-unused.c b/clang/test/Driver/munaligned-access-unused.c
index 1d86edb..4060b53 100644
--- a/clang/test/Driver/munaligned-access-unused.c
+++ b/clang/test/Driver/munaligned-access-unused.c
@@ -1,8 +1,9 @@
-/// Check -m[no-]unaligned-access and -m[no-]strict-align are warned unused on a target that does not support them.
+/// Check -m[no-]unaligned-access and -m[no-]strict-align are errored on a target that does not support them.
 
 // RUN: not %clang --target=x86_64 -munaligned-access -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=unaligned-access
 // RUN: not %clang --target=x86_64 -mno-unaligned-access -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=no-unaligned-access
-// RUN: not %clang --target=x86_64 -mstrict-align -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=strict-align
-// RUN: not %clang --target=x86_64 -mno-strict-align -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=no-strict-align
+// RUN: not %clang --target=x86_64 -mno-strict-align -mstrict-align -fsyntax-only %s -### 2>&1 | FileCheck %s --check-prefix=ALIGN
 
 // CHECK: error: unsupported option '-m{{(no-)?}}unaligned-access' for target '{{.*}}'
+// ALIGN: error: unsupported option '-mno-strict-align' for target '{{.*}}'
+// ALIGN: error: unsupported option '-mstrict-align' for target '{{.*}}'
diff --git a/clang/test/Driver/riscv-features.c b/clang/test/Driver/riscv-features.c
index fc5fb0f..fe74ac7 100644
--- a/clang/test/Driver/riscv-features.c
+++ b/clang/test/Driver/riscv-features.c
@@ -33,8 +33,6 @@
 // NO-FORCE-SW-SCS: "-target-feature" "-forced-sw-shadow-stack"
 // DEFAULT-NOT: "-target-feature" "+forced-sw-shadow-stack"
 
-// RUN: %clang --target=riscv32-unknown-elf -### %s -munaligned-access 2>&1 | FileCheck %s -check-prefix=FAST-UNALIGNED-ACCESS
-// RUN: %clang --target=riscv32-unknown-elf -### %s -mno-unaligned-access 2>&1 | FileCheck %s -check-prefix=NO-FAST-UNALIGNED-ACCESS
 // RUN: %clang --target=riscv32-unknown-elf -### %s -mno-strict-align 2>&1 | FileCheck %s -check-prefix=FAST-UNALIGNED-ACCESS
 // RUN: %clang --target=riscv32-unknown-elf -### %s -mstrict-align 2>&1 | FileCheck %s -check-prefix=NO-FAST-UNALIGNED-ACCESS
 
diff --git a/clang/test/Driver/tls-dialect.c b/clang/test/Driver/tls-dialect.c
index f73915b..a808dd8 100644
--- a/clang/test/Driver/tls-dialect.c
+++ b/clang/test/Driver/tls-dialect.c
@@ -2,6 +2,7 @@
 // RUN: %clang -### --target=riscv64-linux -mtls-dialect=trad %s 2>&1 | FileCheck --check-prefix=NODESC %s
 // RUN: %clang -### --target=riscv64-linux %s 2>&1 | FileCheck --check-prefix=NODESC %s
 // RUN: %clang -### --target=x86_64-linux -mtls-dialect=gnu %s 2>&1 | FileCheck --check-prefix=NODESC %s
+// RUN: %clang -### --target=x86_64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=DESC %s
 
 /// Android supports TLSDESC by default on RISC-V
 /// TLSDESC is not on by default in Linux, even on RISC-V, and is covered above
@@ -18,7 +19,6 @@
 
 /// Unsupported argument
 // RUN: not %clang -### --target=riscv64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s
-// RUN: not %clang -### --target=x86_64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s
 
 // DESC:       "-cc1" {{.*}}"-enable-tlsdesc"
 // NODESC-NOT: "-enable-tlsdesc"
diff --git a/clang/test/InstallAPI/basic.test b/clang/test/InstallAPI/basic.test
index 5b41ccd..0969110 100644
--- a/clang/test/InstallAPI/basic.test
+++ b/clang/test/InstallAPI/basic.test
@@ -2,7 +2,8 @@
 // RUN: split-file %s %t
 /// Check basic arguments are captured.
 // RUN: clang-installapi -x objective-c -target arm64-apple-ios13.0.0 \
-// RUN: -fapplication-extension -current_version 1 -install_name /usr/lib/basic.dylib \
+// RUN: -fapplication-extension -current_version 1 -compatibility_version 1 \
+// RUN: -install_name /usr/lib/basic.dylib \
 // RUN: %t/basic_inputs.json -o %t/basic.tbd 2>&1 | FileCheck %s --allow-empty
 // RUN: llvm-readtapi -compare %t/basic.tbd %t/expected.tbd 2>&1 | FileCheck %s --allow-empty
 
@@ -25,11 +26,6 @@
 //--- expected.tbd
 {
   "main_library": {
-    "compatibility_versions": [
-      {
-        "version": "0"
-      }
-    ],
     "install_names": [
       {
         "name": "/usr/lib/basic.dylib"
diff --git a/clang/test/Modules/reduced-bmi-generating-codes.cppm b/clang/test/Modules/reduced-bmi-generating-codes.cppm
new file mode 100644
index 0000000..13dcda0
--- /dev/null
+++ b/clang/test/Modules/reduced-bmi-generating-codes.cppm
@@ -0,0 +1,40 @@
+// Although the reduced BMI are not designed to be generated,
+// it is helpful for testing whether we've reduced the definitions.
+//
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.cppm \
+// RUN:     -emit-reduced-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cpp \
+// RUN:     -fmodule-file=a=%t/a.pcm -S -emit-llvm -o - \
+// RUN:     | FileCheck %t/b.cpp
+
+//--- a.cppm
+export module a;
+
+export template <class T>
+class A {
+public:
+    int member() {
+        return 43;
+    }
+};
+
+// Instantiate `A<int>::member()`.
+export int a_member = A<int>().member();
+
+export const int a = 43;
+
+//--- b.cpp
+import a;
+
+static_assert(a == 43);
+
+int b() {
+    A<int> a;
+    return a.member();
+}
+
+// CHECK: define{{.*}}@_ZNW1a1AIiE6memberEv
diff --git a/clang/test/Preprocessor/bpf-predefined-macros.c b/clang/test/Preprocessor/bpf-predefined-macros.c
index fea24d1..246cbfa 100644
--- a/clang/test/Preprocessor/bpf-predefined-macros.c
+++ b/clang/test/Preprocessor/bpf-predefined-macros.c
@@ -61,7 +61,7 @@ int r;
 #ifdef __BPF_FEATURE_ST
 int s;
 #endif
-#ifdef __BPF_FEATURE_ARENA_CAST
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
 int t;
 #endif
 
diff --git a/clang/test/Preprocessor/ptrauth_feature.c b/clang/test/Preprocessor/ptrauth_feature.c
new file mode 100644
index 0000000..e45c6ea
--- /dev/null
+++ b/clang/test/Preprocessor/ptrauth_feature.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 %s -E -triple=arm64-- | FileCheck %s --check-prefixes=NOINTRIN
+// RUN: %clang_cc1 %s -E -triple=arm64-- -fptrauth-intrinsics | FileCheck %s --check-prefixes=INTRIN
+
+#if __has_feature(ptrauth_intrinsics)
+// INTRIN: has_ptrauth_intrinsics
+void has_ptrauth_intrinsics() {}
+#else
+// NOINTRIN: no_ptrauth_intrinsics
+void no_ptrauth_intrinsics() {}
+#endif
diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index b2cad622..dfc6d18 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -1666,9 +1666,9 @@
 // CHECK-MISALIGNED-AVOID: __riscv_misaligned_avoid 1
 
 // RUN: %clang --target=riscv32-unknown-linux-gnu -march=rv32i -E -dM %s \
-// RUN:   -munaligned-access -o - | FileCheck %s --check-prefix=CHECK-MISALIGNED-FAST
+// RUN:   -mno-strict-align -o - | FileCheck %s --check-prefix=CHECK-MISALIGNED-FAST
 // RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64i -E -dM %s \
-// RUN:   -munaligned-access -o - | FileCheck %s --check-prefix=CHECK-MISALIGNED-FAST
+// RUN:   -mno-strict-align -o - | FileCheck %s --check-prefix=CHECK-MISALIGNED-FAST
 // RUN: %clang --target=riscv64-unknown-linux-gnu -mcpu=sifive-p450 -E -dM %s \
 // RUN:  -o - | FileCheck %s --check-prefix=CHECK-MISALIGNED-FAST
 // CHECK-MISALIGNED-FAST: __riscv_misaligned_fast 1
diff --git a/clang/test/Sema/PR84368.cpp b/clang/test/Sema/PR84368.cpp
new file mode 100644
index 0000000..6551df2
--- /dev/null
+++ b/clang/test/Sema/PR84368.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -std=c++20 -verify %s
+// RUN: %clang_cc1 -std=c++23 -verify %s
+// expected-no-diagnostics
+
+template<class T> concept IsOk = requires() { typename T::Float; };
+
+template<IsOk T> struct Thing;
+
+template<IsOk T> struct Foobar {
+  template<int> struct Inner {
+    template<IsOk T2> friend struct Thing;
+  };
+};
+
+struct MyType { using Float=float; };
+Foobar<MyType>::Inner<0> foobar;
diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c
index e711121..f094162 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -29,3 +29,12 @@ void test_prefetch(void) {
   __builtin_arm_prefetch(0, 0, 0, 2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
   __builtin_arm_prefetch(0, 0, 0, 0, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 }
+
+void test_trap(short s, unsigned short us) {
+  __builtin_arm_trap(42);
+  __builtin_arm_trap(65535);
+  __builtin_arm_trap(-1);
+  __builtin_arm_trap(65536); // expected-warning {{implicit conversion from 'int' to 'unsigned short' changes value from 65536 to 0}}
+  __builtin_arm_trap(s); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
+  __builtin_arm_trap(us); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
+}
+\ No newline at end of file
diff --git a/clang/test/Sema/ptrauth-intrinsics-macro.c b/clang/test/Sema/ptrauth-intrinsics-macro.c
new file mode 100644
index 0000000..07d6374
--- /dev/null
+++ b/clang/test/Sema/ptrauth-intrinsics-macro.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios -Wall -fsyntax-only -verify -fptrauth-intrinsics %s
+// RUN: %clang_cc1 -triple arm64-apple-ios -Wall -fsyntax-only -verify %s
+
+// expected-no-diagnostics
+
+#include <ptrauth.h>
+
+#define VALID_CODE_KEY 0
+#define VALID_DATA_KEY 2
+
+extern int dv;
+
+void test(int *dp, int value) {
+  dp = ptrauth_strip(dp, VALID_DATA_KEY);
+  ptrauth_extra_data_t t0 = ptrauth_blend_discriminator(dp, value);
+  (void)t0;
+  dp = ptrauth_sign_unauthenticated(dp, VALID_DATA_KEY, 0);
+  dp = ptrauth_auth_and_resign(dp, VALID_DATA_KEY, dp, VALID_DATA_KEY, dp);
+  dp = ptrauth_auth_data(dp, VALID_DATA_KEY, 0);
+  int pu0 = 0, pu1 = 0, pu2 = 0, pu3 = 0, pu4 = 0, pu5 = 0, pu6 = 0, pu7 = 0;
+  ptrauth_blend_discriminator(&pu0, value);
+  ptrauth_auth_and_resign(&pu1, VALID_DATA_KEY, dp, VALID_DATA_KEY, dp);
+  ptrauth_auth_and_resign(dp, VALID_DATA_KEY, &pu2, VALID_DATA_KEY, dp);
+  ptrauth_auth_and_resign(dp, VALID_DATA_KEY, dp, VALID_DATA_KEY, &pu3);
+  ptrauth_sign_generic_data(pu4, dp);
+  ptrauth_sign_generic_data(dp, pu5);
+  ptrauth_auth_data(&pu6, VALID_DATA_KEY, value);
+  ptrauth_auth_data(dp, VALID_DATA_KEY, pu7);
+
+
+
+  int t2 = ptrauth_sign_generic_data(dp, 0);
+  (void)t2;
+}
diff --git a/clang/test/Sema/ptrauth.c b/clang/test/Sema/ptrauth.c
new file mode 100644
index 0000000..3ad3d70
--- /dev/null
+++ b/clang/test/Sema/ptrauth.c
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios -fsyntax-only -verify -fptrauth-intrinsics %s
+
+#if __has_feature(ptrauth_intrinsics)
+#warning Pointer authentication enabled!
+// expected-warning@-1 {{Pointer authentication enabled!}}
+#endif
+
+#if __aarch64__
+#define VALID_CODE_KEY 0
+#define VALID_DATA_KEY 2
+#define INVALID_KEY 200
+#else
+#error Provide these constants if you port this test
+#endif
+
+#define NULL ((void*) 0)
+struct A { int x; } mismatched_type;
+
+extern int dv;
+extern int fv(int);
+
+void test_strip(int *dp, int (*fp)(int)) {
+  __builtin_ptrauth_strip(dp); // expected-error {{too few arguments}}
+  __builtin_ptrauth_strip(dp, VALID_DATA_KEY, dp); // expected-error {{too many arguments}}
+  (void) __builtin_ptrauth_strip(NULL, VALID_DATA_KEY); // no warning
+
+  __builtin_ptrauth_strip(mismatched_type, VALID_DATA_KEY); // expected-error {{signed value must have pointer type; type here is 'struct A'}}
+  __builtin_ptrauth_strip(dp, mismatched_type); // expected-error {{passing 'struct A' to parameter of incompatible type 'int'}}
+
+  int *dr = __builtin_ptrauth_strip(dp, VALID_DATA_KEY);
+  dr = __builtin_ptrauth_strip(dp, INVALID_KEY); // expected-error {{does not identify a valid pointer authentication key for the current target}}
+
+  int (*fr)(int) = __builtin_ptrauth_strip(fp, VALID_CODE_KEY);
+  fr = __builtin_ptrauth_strip(fp, INVALID_KEY); // expected-error {{does not identify a valid pointer authentication key for the current target}}
+
+  float *mismatch = __builtin_ptrauth_strip(dp, VALID_DATA_KEY); // expected-warning {{incompatible pointer types initializing 'float *' with an expression of type 'int *'}}
+}
+
+void test_blend_discriminator(int *dp, int (*fp)(int), int value) {
+  __builtin_ptrauth_blend_discriminator(dp); // expected-error {{too few arguments}}
+  __builtin_ptrauth_blend_discriminator(dp, dp, dp); // expected-error {{too many arguments}}
+  (void) __builtin_ptrauth_blend_discriminator(dp, value); // no warning
+
+  __builtin_ptrauth_blend_discriminator(mismatched_type, value); // expected-error {{blended pointer must have pointer type; type here is 'struct A'}}
+  __builtin_ptrauth_blend_discriminator(dp, mismatched_type); // expected-error {{blended integer must have integer type; type here is 'struct A'}}
+
+  float *mismatch = __builtin_ptrauth_blend_discriminator(dp, value); // expected-error {{incompatible integer to pointer conversion initializing 'float *' with an expression of type}}
+}
+
+void test_sign_unauthenticated(int *dp, int (*fp)(int)) {
+  __builtin_ptrauth_sign_unauthenticated(dp, VALID_DATA_KEY); // expected-error {{too few arguments}}
+  __builtin_ptrauth_sign_unauthenticated(dp, VALID_DATA_KEY, dp, dp); // expected-error {{too many arguments}}
+
+  __builtin_ptrauth_sign_unauthenticated(mismatched_type, VALID_DATA_KEY, 0); // expected-error {{signed value must have pointer type; type here is 'struct A'}}
+  __builtin_ptrauth_sign_unauthenticated(dp, mismatched_type, 0); // expected-error {{passing 'struct A' to parameter of incompatible type 'int'}}
+  __builtin_ptrauth_sign_unauthenticated(dp, VALID_DATA_KEY, mismatched_type); // expected-error {{extra discriminator must have pointer or integer type; type here is 'struct A'}}
+
+  (void) __builtin_ptrauth_sign_unauthenticated(NULL, VALID_DATA_KEY, 0); // expected-warning {{signing a null pointer will yield a non-null pointer}}
+
+  int *dr = __builtin_ptrauth_sign_unauthenticated(dp, VALID_DATA_KEY, 0);
+  dr = __builtin_ptrauth_sign_unauthenticated(dp, INVALID_KEY, 0); // expected-error {{does not identify a valid pointer authentication key for the current target}}
+
+  int (*fr)(int) = __builtin_ptrauth_sign_unauthenticated(fp, VALID_CODE_KEY, 0);
+  fr = __builtin_ptrauth_sign_unauthenticated(fp, INVALID_KEY, 0); // expected-error {{does not identify a valid pointer authentication key for the current target}}
+
+  float *mismatch = __builtin_ptrauth_sign_unauthenticated(dp, VALID_DATA_KEY, 0); // expected-warning {{incompatible pointer types initializing 'float *' with an expression of type 'int *'}}
+}
+
+void test_auth(int *dp, int (*fp)(int)) {
+  __builtin_ptrauth_auth(dp, VALID_DATA_KEY); // expected-error {{too few arguments}}
+  __builtin_ptrauth_auth(dp, VALID_DATA_KEY, dp, dp); // expected-error {{too many arguments}}
+
+  __builtin_ptrauth_auth(mismatched_type, VALID_DATA_KEY, 0); // expected-error {{signed value must have pointer type; type here is 'struct A'}}
+  __builtin_ptrauth_auth(dp, mismatched_type, 0); // expected-error {{passing 'struct A' to parameter of incompatible type 'int'}}
+  __builtin_ptrauth_auth(dp, VALID_DATA_KEY, mismatched_type); // expected-error {{extra discriminator must have pointer or integer type; type here is 'struct A'}}
+
+  (void) __builtin_ptrauth_auth(NULL, VALID_DATA_KEY, 0); // expected-warning {{authenticating a null pointer will almost certainly trap}}
+
+  int *dr = __builtin_ptrauth_auth(dp, VALID_DATA_KEY, 0);
+  dr = __builtin_ptrauth_auth(dp, INVALID_KEY, 0); // expected-error {{does not identify a valid pointer authentication key for the current target}}
+
+  int (*fr)(int) = __builtin_ptrauth_auth(fp, VALID_CODE_KEY, 0);
+  fr = __builtin_ptrauth_auth(fp, INVALID_KEY, 0); // expected-error {{does not identify a valid pointer authentication key for the current target}}
+
+  float *mismatch = __builtin_ptrauth_auth(dp, VALID_DATA_KEY, 0); // expected-warning {{incompatible pointer types initializing 'float *' with an expression of type 'int *'}}
+}
+
+void test_auth_and_resign(int *dp, int (*fp)(int)) {
+  __builtin_ptrauth_auth_and_resign(dp, VALID_DATA_KEY, 0, VALID_DATA_KEY); // expected-error {{too few arguments}}
+  __builtin_ptrauth_auth_and_resign(dp, VALID_DATA_KEY, dp, VALID_DATA_KEY, dp, 0); // expected-error {{too many arguments}}
+
+  __builtin_ptrauth_auth_and_resign(mismatched_type, VALID_DATA_KEY, 0, VALID_DATA_KEY, dp); // expected-error {{signed value must have pointer type; type here is 'struct A'}}
+  __builtin_ptrauth_auth_and_resign(dp, mismatched_type, 0, VALID_DATA_KEY, dp); // expected-error {{passing 'struct A' to parameter of incompatible type 'int'}}
+  __builtin_ptrauth_auth_and_resign(dp, VALID_DATA_KEY, mismatched_type, VALID_DATA_KEY, dp); // expected-error {{extra discriminator must have pointer or integer type; type here is 'struct A'}}
+  __builtin_ptrauth_auth_and_resign(dp, VALID_DATA_KEY, 0, mismatched_type, dp); // expected-error {{passing 'struct A' to parameter of incompatible type 'int'}}
+  __builtin_ptrauth_auth_and_resign(dp, VALID_DATA_KEY, 0, VALID_DATA_KEY, mismatched_type); // expected-error {{extra discriminator must have pointer or integer type; type here is 'struct A'}}
+
+  (void) __builtin_ptrauth_auth_and_resign(NULL, VALID_DATA_KEY, 0, VALID_DATA_KEY, dp); // expected-warning {{authenticating a null pointer will almost certainly trap}}
+
+  int *dr = __builtin_ptrauth_auth_and_resign(dp, VALID_DATA_KEY, 0, VALID_DATA_KEY, dp);
+  dr = __builtin_ptrauth_auth_and_resign(dp, INVALID_KEY, 0, VALID_DATA_KEY, dp); // expected-error {{does not identify a valid pointer authentication key for the current target}}
+  dr = __builtin_ptrauth_auth_and_resign(dp, VALID_DATA_KEY, 0, INVALID_KEY, dp); // expected-error {{does not identify a valid pointer authentication key for the current target}}
+
+  int (*fr)(int) = __builtin_ptrauth_auth_and_resign(fp, VALID_CODE_KEY, 0, VALID_CODE_KEY, dp);
+  fr = __builtin_ptrauth_auth_and_resign(fp, INVALID_KEY, 0, VALID_CODE_KEY, dp); // expected-error {{does not identify a valid pointer authentication key for the current target}}
+  fr = __builtin_ptrauth_auth_and_resign(fp, VALID_CODE_KEY, 0, INVALID_KEY, dp); // expected-error {{does not identify a valid pointer authentication key for the current target}}
+
+  float *mismatch = __builtin_ptrauth_auth_and_resign(dp, VALID_DATA_KEY, 0, VALID_DATA_KEY, dp); // expected-warning {{incompatible pointer types initializing 'float *' with an expression of type 'int *'}}
+}
+
+void test_sign_generic_data(int *dp) {
+  __builtin_ptrauth_sign_generic_data(dp); // expected-error {{too few arguments}}
+  __builtin_ptrauth_sign_generic_data(dp, 0, 0); // expected-error {{too many arguments}}
+
+  __builtin_ptrauth_sign_generic_data(mismatched_type, 0); // expected-error {{signed value must have pointer or integer type; type here is 'struct A'}}
+  __builtin_ptrauth_sign_generic_data(dp, mismatched_type); // expected-error {{extra discriminator must have pointer or integer type; type here is 'struct A'}}
+
+  (void) __builtin_ptrauth_sign_generic_data(NULL, 0); // no warning
+
+  unsigned long dr = __builtin_ptrauth_sign_generic_data(dp, 0);
+  dr = __builtin_ptrauth_sign_generic_data(dp, &dv);
+  dr = __builtin_ptrauth_sign_generic_data(12314, 0);
+  dr = __builtin_ptrauth_sign_generic_data(12314, &dv);
+
+  int *mismatch = __builtin_ptrauth_sign_generic_data(dp, 0); // expected-error {{incompatible integer to pointer conversion initializing 'int *' with an expression of type}}
+}
diff --git a/clang/test/SemaCXX/decomposed-condition.cpp b/clang/test/SemaCXX/decomposed-condition.cpp
index ab011f6..e55bbee 100644
--- a/clang/test/SemaCXX/decomposed-condition.cpp
+++ b/clang/test/SemaCXX/decomposed-condition.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -std=c++1z -Wno-binding-in-condition -verify %s
+// RUN: %clang_cc1 -std=c++1z -Wno-binding-in-condition -verify %s -fexperimental-new-constant-interpreter
 
 struct X {
   bool flag;
diff --git a/clang/test/SemaHLSL/BuiltIns/frac-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/frac-errors.hlsl
index 06dbdf0..f82b594 100644
--- a/clang/test/SemaHLSL/BuiltIns/frac-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/frac-errors.hlsl
@@ -13,7 +13,7 @@ float2 test_too_many_arg(float2 p0) {
 
 float builtin_bool_to_float_type_promotion(bool p1) {
   return __builtin_hlsl_elementwise_frac(p1);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'bool')}}
+  // expected-error@-1 {{passing 'bool' to parameter of incompatible type 'float'}}
 }
 
 float builtin_frac_int_to_float_promotion(int p1) {
@@ -25,3 +25,15 @@ float2 builtin_frac_int2_to_float2_promotion(int2 p1) {
   return __builtin_hlsl_elementwise_frac(p1);
   // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
 }
+
+// builtins are variadic functions and so are subject to DefaultVariadicArgumentPromotion
+half builtin_frac_half_scalar (half p0) {
+  return __builtin_hlsl_elementwise_frac (p0);
+  // expected-error@-1 {{passing 'double' to parameter of incompatible type 'float'}}
+}
+
+float builtin_frac_float_scalar ( float p0) {
+  return __builtin_hlsl_elementwise_frac (p0);
+  // expected-error@-1 {{passing 'double' to parameter of incompatible type 'float'}}
+}
+
diff --git a/clang/test/SemaHLSL/BuiltIns/isinf-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/isinf-errors.hlsl
new file mode 100644
index 0000000..7ddfd56
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/isinf-errors.hlsl
@@ -0,0 +1,38 @@
+
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected
+
+bool test_too_few_arg() {
+  return __builtin_hlsl_elementwise_isinf();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+bool2 test_too_many_arg(float2 p0) {
+  return __builtin_hlsl_elementwise_isinf(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+bool builtin_bool_to_float_type_promotion(bool p1) {
+  return __builtin_hlsl_elementwise_isinf(p1);
+  // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
+}
+
+bool builtin_isinf_int_to_float_promotion(int p1) {
+  return __builtin_hlsl_elementwise_isinf(p1);
+  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
+}
+
+bool2 builtin_isinf_int2_to_float2_promotion(int2 p1) {
+  return __builtin_hlsl_elementwise_isinf(p1);
+  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+}
+
+// builtins are variadic functions and so are subject to DefaultVariadicArgumentPromotion
+half builtin_isinf_half_scalar (half p0) {
+  return __builtin_hlsl_elementwise_isinf (p0);
+  // expected-error@-1 {{passing 'double' to parameter of incompatible type 'float'}}
+}
+
+float builtin_isinf_float_scalar ( float p0) {
+  return __builtin_hlsl_elementwise_isinf (p0);
+  // expected-error@-1 {{passing 'double' to parameter of incompatible type 'float'}}
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl
index f6ce87e..83751f6 100644
--- a/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl
@@ -92,5 +92,18 @@ float builtin_lerp_int_to_float_promotion(float p0, int p1) {
 
 float4 test_lerp_int4(int4 p0, int4 p1, int4 p2) {
   return __builtin_hlsl_lerp(p0, p1, p2);
-   // expected-error@-1 {{1st argument must be a floating point type (was 'int4' (aka 'vector<int, 4>'))}}
-}
-\ No newline at end of file
+  // expected-error@-1 {{1st argument must be a floating point type (was 'int4' (aka 'vector<int, 4>'))}}
+}
+
+// note: DefaultVariadicArgumentPromotion --> DefaultArgumentPromotion has already promoted to double
+// we don't know anymore that the input was half when __builtin_hlsl_lerp is called so we default to float
+// for expected type
+half builtin_lerp_half_scalar (half p0) {
+  return __builtin_hlsl_lerp ( p0, p0, p0 );
+  // expected-error@-1 {{passing 'double' to parameter of incompatible type 'float'}}
+}
+
+float builtin_lerp_float_scalar ( float p0) {
+  return __builtin_hlsl_lerp ( p0, p0, p0 );
+  // expected-error@-1 {{passing 'double' to parameter of incompatible type 'float'}}
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/rcp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/rcp-errors.hlsl
index dc4501d..fa6fd81 100644
--- a/clang/test/SemaHLSL/BuiltIns/rcp-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/rcp-errors.hlsl
@@ -13,7 +13,7 @@ float2 test_too_many_arg(float2 p0) {
 
 float builtin_bool_to_float_type_promotion(bool p1) {
   return __builtin_hlsl_elementwise_rcp(p1);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'bool')}}
+  // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
 }
 
 float builtin_rcp_int_to_float_promotion(int p1) {
diff --git a/clang/test/SemaHLSL/BuiltIns/rsqrt-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/rsqrt-errors.hlsl
new file mode 100644
index 0000000..c027a69
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/rsqrt-errors.hlsl
@@ -0,0 +1,38 @@
+
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected
+
+float test_too_few_arg() {
+  return __builtin_hlsl_elementwise_rsqrt();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+float2 test_too_many_arg(float2 p0) {
+  return __builtin_hlsl_elementwise_rsqrt(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+float builtin_bool_to_float_type_promotion(bool p1) {
+  return __builtin_hlsl_elementwise_rsqrt(p1);
+  // expected-error@-1 {{passing 'bool' to parameter of incompatible type 'float'}}
+}
+
+float builtin_rsqrt_int_to_float_promotion(int p1) {
+  return __builtin_hlsl_elementwise_rsqrt(p1);
+  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
+}
+
+float2 builtin_rsqrt_int2_to_float2_promotion(int2 p1) {
+  return __builtin_hlsl_elementwise_rsqrt(p1);
+  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+}
+
+// builtins are variadic functions and so are subject to DefaultVariadicArgumentPromotion
+half builtin_rsqrt_half_scalar (half p0) {
+  return __builtin_hlsl_elementwise_rsqrt (p0);
+  // expected-error@-1 {{passing 'double' to parameter of incompatible type 'float'}}
+}
+
+float builtin_rsqrt_float_scalar ( float p0) {
+  return __builtin_hlsl_elementwise_rsqrt (p0);
+  // expected-error@-1 {{passing 'double' to parameter of incompatible type 'float'}}
+}
diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp
index 701ab81..70cb80f 100644
--- a/clang/tools/clang-installapi/Options.cpp
+++ b/clang/tools/clang-installapi/Options.cpp
@@ -85,6 +85,9 @@ bool Options::processLinkerOptions(InputArgList &Args) {
   if (auto *Arg = Args.getLastArg(OPT_current__version))
     LinkerOpts.CurrentVersion.parse64(Arg->getValue());
 
+  if (auto *Arg = Args.getLastArg(OPT_compatibility__version))
+    LinkerOpts.CompatVersion.parse64(Arg->getValue());
+
   LinkerOpts.IsDylib = Args.hasArg(OPT_dynamiclib);
 
   LinkerOpts.AppExtensionSafe =
@@ -159,6 +162,7 @@ InstallAPIContext Options::createContext() {
 
   Ctx.BA.InstallName = LinkerOpts.InstallName;
   Ctx.BA.CurrentVersion = LinkerOpts.CurrentVersion;
+  Ctx.BA.CompatVersion = LinkerOpts.CompatVersion;
   Ctx.BA.AppExtensionSafe = LinkerOpts.AppExtensionSafe;
   Ctx.FT = DriverOpts.OutFT;
   Ctx.OutputLoc = DriverOpts.OutputPath;
diff --git a/clang/tools/clang-installapi/Options.h b/clang/tools/clang-installapi/Options.h
index 06f79b6..e218d57 100644
--- a/clang/tools/clang-installapi/Options.h
+++ b/clang/tools/clang-installapi/Options.h
@@ -49,6 +49,9 @@ struct LinkerOptions {
   /// \brief The current version to use for the dynamic library.
   PackedVersion CurrentVersion;
 
+  /// \brief The compatibility version to use for the dynamic library.
+  PackedVersion CompatVersion;
+
   /// \brief Is application extension safe.
   bool AppExtensionSafe = false;
 
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 535ef42..c60be27 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -562,6 +562,7 @@ Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
   case Triple::aarch64_be:
   case Triple::ppc64:
   case Triple::ppc64le:
+  case Triple::systemz:
     return generic::clang(InputFiles, Args);
   default:
     return createStringError(inconvertibleErrorCode(),
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index fcba0e6..21c18a0 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -1776,9 +1776,22 @@ TEST_F(TokenAnnotatorTest, UnderstandsFunctionDeclarationNames) {
   auto Style = getLLVMStyle();
   Style.TypeNames.push_back("MyType");
   Tokens = annotate("int iso_time(MyType);", Style);
+  ASSERT_TRUE(IsCpp);
   ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName);
   EXPECT_TOKEN(Tokens[3], tok::identifier, TT_TypeName);
+
+  Style.Language = FormatStyle::LK_CSharp;
+  Tokens = annotate("int iso_time(time_t);", Style);
+  ASSERT_FALSE(IsCpp);
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName);
+
+  Style.Language = FormatStyle::LK_ObjC;
+  Tokens = annotate("int iso_time(time_t);", Style);
+  ASSERT_TRUE(IsCpp);
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandsCtorAndDtorDeclNames) {
diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp
index 3cdf10c..a2fc27d 100644
--- a/compiler-rt/lib/msan/msan.cpp
+++ b/compiler-rt/lib/msan/msan.cpp
@@ -467,7 +467,7 @@ void __msan_init() {
   __msan_clear_on_return();
   if (__msan_get_track_origins())
     VPrintf(1, "msan_track_origins\n");
-  if (!InitShadow(__msan_get_track_origins())) {
+  if (!InitShadowWithReExec(__msan_get_track_origins())) {
     Printf("FATAL: MemorySanitizer can not mmap the shadow memory.\n");
     Printf("FATAL: Make sure to compile with -fPIE and to link with -pie.\n");
     Printf("FATAL: Disabling ASLR is known to cause this error.\n");
diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h
index 710447a..7fb58be 100644
--- a/compiler-rt/lib/msan/msan.h
+++ b/compiler-rt/lib/msan/msan.h
@@ -33,12 +33,18 @@ struct MappingDesc {
   uptr start;
   uptr end;
   enum Type {
-    INVALID, APP, SHADOW, ORIGIN
+    INVALID = 1,
+    ALLOCATOR = 2,
+    APP = 4,
+    SHADOW = 8,
+    ORIGIN = 16,
   } type;
   const char *name;
 };
 
-
+// Note: MappingDesc::ALLOCATOR entries are only used to check for memory
+// layout compatibility. The actual allocation settings are in
+// msan_allocator.cpp, which need to be kept in sync.
 #if SANITIZER_LINUX && defined(__mips64)
 
 // MIPS64 maps:
@@ -84,7 +90,8 @@ const MappingDesc kMemoryLayout[] = {
     {0X0B00000000000, 0X0C00000000000, MappingDesc::SHADOW, "shadow-10-13"},
     {0X0C00000000000, 0X0D00000000000, MappingDesc::INVALID, "invalid"},
     {0X0D00000000000, 0X0E00000000000, MappingDesc::ORIGIN, "origin-10-13"},
-    {0X0E00000000000, 0X1000000000000, MappingDesc::APP, "app-15"},
+    {0x0E00000000000, 0x0E40000000000, MappingDesc::ALLOCATOR, "allocator"},
+    {0X0E40000000000, 0X1000000000000, MappingDesc::APP, "app-15"},
 };
 # define MEM_TO_SHADOW(mem) ((uptr)mem ^ 0xB00000000000ULL)
 # define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x200000000000ULL)
@@ -106,7 +113,8 @@ const MappingDesc kMemoryLayout[] = {
     {0x510000000000ULL, 0x600000000000ULL, MappingDesc::APP, "app-2"},
     {0x600000000000ULL, 0x610000000000ULL, MappingDesc::ORIGIN, "origin-1"},
     {0x610000000000ULL, 0x700000000000ULL, MappingDesc::INVALID, "invalid"},
-    {0x700000000000ULL, 0x800000000000ULL, MappingDesc::APP, "app-3"}};
+    {0x700000000000ULL, 0x740000000000ULL, MappingDesc::ALLOCATOR, "allocator"},
+    {0x740000000000ULL, 0x800000000000ULL, MappingDesc::APP, "app-3"}};
 #  define MEM_TO_SHADOW(mem) (((uptr)(mem)) ^ 0x500000000000ULL)
 #  define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x100000000000ULL)
 
@@ -118,7 +126,8 @@ const MappingDesc kMemoryLayout[] = {
     {0x180200000000ULL, 0x1C0000000000ULL, MappingDesc::INVALID, "invalid"},
     {0x1C0000000000ULL, 0x2C0200000000ULL, MappingDesc::ORIGIN, "origin"},
     {0x2C0200000000ULL, 0x300000000000ULL, MappingDesc::INVALID, "invalid"},
-    {0x300000000000ULL, 0x800000000000ULL, MappingDesc::APP, "high memory"}};
+    {0x300000000000ULL, 0x320000000000ULL, MappingDesc::ALLOCATOR, "allocator"},
+    {0x320000000000ULL, 0x800000000000ULL, MappingDesc::APP, "high memory"}};
 
 // Various kernels use different low end ranges but we can combine them into one
 // big range. They also use different high end ranges but we can map them all to
@@ -141,7 +150,8 @@ const MappingDesc kMemoryLayout[] = {
     {0x180000000000ULL, 0x1C0000000000ULL, MappingDesc::INVALID, "invalid"},
     {0x1C0000000000ULL, 0x2C0000000000ULL, MappingDesc::ORIGIN, "origin"},
     {0x2C0000000000ULL, 0x440000000000ULL, MappingDesc::INVALID, "invalid"},
-    {0x440000000000ULL, 0x500000000000ULL, MappingDesc::APP, "high memory"}};
+    {0x440000000000ULL, 0x460000000000ULL, MappingDesc::ALLOCATOR, "allocator"},
+    {0x460000000000ULL, 0x500000000000ULL, MappingDesc::APP, "high memory"}};
 
 #define MEM_TO_SHADOW(mem) \
   ((((uptr)(mem)) & ~0xC00000000000ULL) + 0x080000000000ULL)
@@ -208,7 +218,8 @@ const MappingDesc kMemoryLayout[] = {
     {0x510000000000ULL, 0x600000000000ULL, MappingDesc::APP, "app-2"},
     {0x600000000000ULL, 0x610000000000ULL, MappingDesc::ORIGIN, "origin-1"},
     {0x610000000000ULL, 0x700000000000ULL, MappingDesc::INVALID, "invalid"},
-    {0x700000000000ULL, 0x800000000000ULL, MappingDesc::APP, "app-3"}};
+    {0x700000000000ULL, 0x740000000000ULL, MappingDesc::ALLOCATOR, "allocator"},
+    {0x740000000000ULL, 0x800000000000ULL, MappingDesc::APP, "app-3"}};
 #define MEM_TO_SHADOW(mem) (((uptr)(mem)) ^ 0x500000000000ULL)
 #define SHADOW_TO_ORIGIN(mem) (((uptr)(mem)) + 0x100000000000ULL)
 
@@ -223,20 +234,22 @@ const uptr kMemoryLayoutSize = sizeof(kMemoryLayout) / sizeof(kMemoryLayout[0]);
 #ifndef __clang__
 __attribute__((optimize("unroll-loops")))
 #endif
-inline bool addr_is_type(uptr addr, MappingDesc::Type mapping_type) {
+inline bool
+addr_is_type(uptr addr, int mapping_types) {
 // It is critical for performance that this loop is unrolled (because then it is
 // simplified into just a few constant comparisons).
 #ifdef __clang__
 #pragma unroll
 #endif
   for (unsigned i = 0; i < kMemoryLayoutSize; ++i)
-    if (kMemoryLayout[i].type == mapping_type &&
+    if ((kMemoryLayout[i].type & mapping_types) &&
         addr >= kMemoryLayout[i].start && addr < kMemoryLayout[i].end)
       return true;
   return false;
 }
 
-#define MEM_IS_APP(mem) addr_is_type((uptr)(mem), MappingDesc::APP)
+#define MEM_IS_APP(mem) \
+  (addr_is_type((uptr)(mem), MappingDesc::APP | MappingDesc::ALLOCATOR))
 #define MEM_IS_SHADOW(mem) addr_is_type((uptr)(mem), MappingDesc::SHADOW)
 #define MEM_IS_ORIGIN(mem) addr_is_type((uptr)(mem), MappingDesc::ORIGIN)
 
@@ -250,7 +263,7 @@ extern bool msan_init_is_running;
 extern int msan_report_count;
 
 bool ProtectRange(uptr beg, uptr end);
-bool InitShadow(bool init_origins);
+bool InitShadowWithReExec(bool init_origins);
 char *GetProcSelfMaps();
 void InitializeInterceptors();
 
diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp
index 0b2dd2b..b1bc5b9 100644
--- a/compiler-rt/lib/msan/msan_allocator.cpp
+++ b/compiler-rt/lib/msan/msan_allocator.cpp
@@ -48,6 +48,9 @@ struct MsanMapUnmapCallback {
   }
 };
 
+// Note: to ensure that the allocator is compatible with the application memory
+// layout (especially with high-entropy ASLR), kSpaceBeg and kSpaceSize must be
+// duplicated as MappingDesc::ALLOCATOR in msan.h.
 #if defined(__mips64)
 static const uptr kMaxAllowedMallocSize = 2UL << 30;
 
diff --git a/compiler-rt/lib/msan/msan_linux.cpp b/compiler-rt/lib/msan/msan_linux.cpp
index c7ecb7c..cd2d9f5 100644
--- a/compiler-rt/lib/msan/msan_linux.cpp
+++ b/compiler-rt/lib/msan/msan_linux.cpp
@@ -20,6 +20,9 @@
 #  include <signal.h>
 #  include <stdio.h>
 #  include <stdlib.h>
+#  if SANITIZER_LINUX
+#    include <sys/personality.h>
+#  endif
 #  include <sys/resource.h>
 #  include <sys/time.h>
 #  include <unistd.h>
@@ -43,11 +46,13 @@ void ReportMapRange(const char *descr, uptr beg, uptr size) {
   }
 }
 
-static bool CheckMemoryRangeAvailability(uptr beg, uptr size) {
+static bool CheckMemoryRangeAvailability(uptr beg, uptr size, bool verbose) {
   if (size > 0) {
     uptr end = beg + size - 1;
     if (!MemoryRangeIsAvailable(beg, end)) {
-      Printf("FATAL: Memory range 0x%zx - 0x%zx is not available.\n", beg, end);
+      if (verbose)
+        Printf("FATAL: Memory range 0x%zx - 0x%zx is not available.\n", beg,
+               end);
       return false;
     }
   }
@@ -86,7 +91,7 @@ static void CheckMemoryLayoutSanity() {
     CHECK(addr_is_type(start, type));
     CHECK(addr_is_type((start + end) / 2, type));
     CHECK(addr_is_type(end - 1, type));
-    if (type == MappingDesc::APP) {
+    if (type == MappingDesc::APP || type == MappingDesc::ALLOCATOR) {
       uptr addr = start;
       CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr)));
       CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr)));
@@ -106,7 +111,7 @@ static void CheckMemoryLayoutSanity() {
   }
 }
 
-bool InitShadow(bool init_origins) {
+static bool InitShadow(bool init_origins, bool dry_run) {
   // Let user know mapping parameters first.
   VPrintf(1, "__msan_init %p\n", reinterpret_cast<void *>(&__msan_init));
   for (unsigned i = 0; i < kMemoryLayoutSize; ++i)
@@ -116,8 +121,9 @@ bool InitShadow(bool init_origins) {
   CheckMemoryLayoutSanity();
 
   if (!MEM_IS_APP(&__msan_init)) {
-    Printf("FATAL: Code %p is out of application range. Non-PIE build?\n",
-           reinterpret_cast<void *>(&__msan_init));
+    if (!dry_run)
+      Printf("FATAL: Code %p is out of application range. Non-PIE build?\n",
+             reinterpret_cast<void *>(&__msan_init));
     return false;
   }
 
@@ -138,20 +144,26 @@ bool InitShadow(bool init_origins) {
     bool protect = type == MappingDesc::INVALID ||
                    (!init_origins && type == MappingDesc::ORIGIN);
     CHECK(!(map && protect));
-    if (!map && !protect)
-      CHECK(type == MappingDesc::APP);
+    if (!map && !protect) {
+      CHECK(type == MappingDesc::APP || type == MappingDesc::ALLOCATOR);
+
+      if (dry_run && type == MappingDesc::ALLOCATOR &&
+          !CheckMemoryRangeAvailability(start, size, !dry_run))
+        return false;
+    }
     if (map) {
-      if (!CheckMemoryRangeAvailability(start, size))
+      if (dry_run && !CheckMemoryRangeAvailability(start, size, !dry_run))
         return false;
-      if (!MmapFixedSuperNoReserve(start, size, kMemoryLayout[i].name))
+      if (!dry_run &&
+          !MmapFixedSuperNoReserve(start, size, kMemoryLayout[i].name))
         return false;
-      if (common_flags()->use_madv_dontdump)
+      if (!dry_run && common_flags()->use_madv_dontdump)
         DontDumpShadowMemory(start, size);
     }
     if (protect) {
-      if (!CheckMemoryRangeAvailability(start, size))
+      if (dry_run && !CheckMemoryRangeAvailability(start, size, !dry_run))
         return false;
-      if (!ProtectMemoryRange(start, size, kMemoryLayout[i].name))
+      if (!dry_run && !ProtectMemoryRange(start, size, kMemoryLayout[i].name))
         return false;
     }
   }
@@ -159,6 +171,35 @@ bool InitShadow(bool init_origins) {
   return true;
 }
 
+bool InitShadowWithReExec(bool init_origins) {
+  // Start with dry run: check layout is ok, but don't print warnings because
+  // warning messages will cause tests to fail (even if we successfully re-exec
+  // after the warning).
+  bool success = InitShadow(__msan_get_track_origins(), true);
+  if (!success) {
+#  if SANITIZER_LINUX
+    // Perhaps ASLR entropy is too high. If ASLR is enabled, re-exec without it.
+    int old_personality = personality(0xffffffff);
+    bool aslr_on =
+        (old_personality != -1) && ((old_personality & ADDR_NO_RANDOMIZE) == 0);
+
+    if (aslr_on) {
+      VReport(1,
+              "WARNING: MemorySanitizer: memory layout is incompatible, "
+              "possibly due to high-entropy ASLR.\n"
+              "Re-execing with fixed virtual address space.\n"
+              "N.B. reducing ASLR entropy is preferable.\n");
+      CHECK_NE(personality(old_personality | ADDR_NO_RANDOMIZE), -1);
+      ReExec();
+    }
+#  endif
+  }
+
+  // The earlier dry run didn't actually map or protect anything. Run again in
+  // non-dry run mode.
+  return success && InitShadow(__msan_get_track_origins(), false);
+}
+
 static void MsanAtExit(void) {
   if (flags()->print_stats && (flags()->atexit || msan_report_count > 0))
     ReportStats();
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp
index 3605d0d..ef1fc35 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp
@@ -104,24 +104,7 @@ static void setlim(int res, rlim_t lim) {
 
 void DisableCoreDumperIfNecessary() {
   if (common_flags()->disable_coredump) {
-    rlimit rlim;
-    CHECK_EQ(0, getrlimit(RLIMIT_CORE, &rlim));
-    // On Linux, if the kernel.core_pattern sysctl starts with a '|' (i.e. it
-    // is being piped to a coredump handler such as systemd-coredumpd), the
-    // kernel ignores RLIMIT_CORE (since we aren't creating a file in the file
-    // system) except for the magic value of 1, which disables coredumps when
-    // piping. 1 byte is too small for any kind of valid core dump, so it
-    // also disables coredumps if kernel.core_pattern creates files directly.
-    // While most piped coredump handlers do respect the crashing processes'
-    // RLIMIT_CORE, this is notable not the case for Debian's systemd-coredump
-    // due to a local patch that changes sysctl.d/50-coredump.conf to ignore
-    // the specified limit and instead use RLIM_INFINITY.
-    //
-    // The alternative to using RLIMIT_CORE=1 would be to use prctl() with the
-    // PR_SET_DUMPABLE flag, however that also prevents ptrace(), so makes it
-    // impossible to attach a debugger.
-    rlim.rlim_cur = Min<rlim_t>(SANITIZER_LINUX ? 1 : 0, rlim.rlim_max);
-    CHECK_EQ(0, setrlimit(RLIMIT_CORE, &rlim));
+    setlim(RLIMIT_CORE, 0);
   }
 }
 
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index 8ffc703..2bebe65 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -126,6 +126,7 @@ const int SIGFPE = 8;
 const int SIGSEGV = 11;
 const int SIGPIPE = 13;
 const int SIGTERM = 15;
+const int SIGPROF = 27;
 #if defined(__mips__) || SANITIZER_FREEBSD || SANITIZER_APPLE || SANITIZER_NETBSD
 const int SIGBUS = 10;
 const int SIGSYS = 12;
@@ -2168,7 +2169,8 @@ static bool is_sync_signal(ThreadSignalContext *sctx, int sig,
     return false;
 #endif
   return sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || sig == SIGTRAP ||
-         sig == SIGABRT || sig == SIGFPE || sig == SIGPIPE || sig == SIGSYS;
+         sig == SIGABRT || sig == SIGFPE || sig == SIGPIPE || sig == SIGSYS ||
+         sig == SIGPROF;
 }
 
 void sighandler(int sig, __sanitizer_siginfo *info, void *ctx) {
diff --git a/compiler-rt/test/asan/TestCases/Windows/bitfield_uaf.cpp b/compiler-rt/test/asan/TestCases/Windows/bitfield_uaf.cpp
index 8e1a2ae..12ed505 100644
--- a/compiler-rt/test/asan/TestCases/Windows/bitfield_uaf.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/bitfield_uaf.cpp
@@ -21,13 +21,13 @@ void make_access(S *s) {
 int main(void) {
   S *s = (S*)malloc(sizeof(S));
   free(s);
-// CHECK: [[ADDR]] is located 0 bytes inside of 4-byte region
-// CHECK-LABEL: freed by thread T0 here:
-// CHECK:   {{#0 .* free }}
-// CHECK:   {{#1 .* main .*bitfield_uaf.cpp}}:[[@LINE-4]]
-// CHECK-LABEL: previously allocated by thread T0 here:
-// CHECK:   {{#0 .* malloc }}
-// CHECK:   {{#1 .* main .*bitfield_uaf.cpp}}:[[@LINE-8]]
+  // CHECK: [[ADDR]] is located 0 bytes inside of 4-byte region
+  // CHECK-LABEL: freed by thread T0 here:
+  // CHECK:   {{#0 .* free }}
+  // CHECK:   {{ #[1-2] .* main .*bitfield_uaf.cpp}}:[[@LINE-4]]
+  // CHECK-LABEL: previously allocated by thread T0 here:
+  // CHECK:   {{#0 .* malloc }}
+  // CHECK:   {{ #[1-2] .* main .*bitfield_uaf.cpp}}:[[@LINE-8]]
   make_access(s);
   return 0;
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/calloc_left_oob.cpp b/compiler-rt/test/asan/TestCases/Windows/calloc_left_oob.cpp
index b22c359..e96fb61 100644
--- a/compiler-rt/test/asan/TestCases/Windows/calloc_left_oob.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/calloc_left_oob.cpp
@@ -6,12 +6,12 @@
 int main() {
   int *buffer = (int*)calloc(42, sizeof(int));
   buffer[-1] = 42;
-// CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: WRITE of size 4 at [[ADDR]] thread T0
-// CHECK-NEXT: {{#0 .* main .*calloc_left_oob.cpp}}:[[@LINE-3]]
-// CHECK: [[ADDR]] is located 4 bytes before 168-byte region
-// CHECK: allocated by thread T0 here:
-// CHECK-NEXT: {{#0 .* calloc }}
-// CHECK-NEXT: {{#1 .* main .*calloc_left_oob.cpp}}:[[@LINE-8]]
+  // CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: WRITE of size 4 at [[ADDR]] thread T0
+  // CHECK-NEXT: {{#0 .* main .*calloc_left_oob.cpp}}:[[@LINE-3]]
+  // CHECK: [[ADDR]] is located 4 bytes before 168-byte region
+  // CHECK: allocated by thread T0 here:
+  // CHECK: {{#0 .* calloc }}
+  // CHECK: {{ #[1-2] .* main .*calloc_left_oob.cpp}}:[[@LINE-8]]
   free(buffer);
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/calloc_right_oob.cpp b/compiler-rt/test/asan/TestCases/Windows/calloc_right_oob.cpp
index 9e12f9c..fe0fc20 100644
--- a/compiler-rt/test/asan/TestCases/Windows/calloc_right_oob.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/calloc_right_oob.cpp
@@ -6,12 +6,12 @@
 int main() {
   int *buffer = (int*)calloc(42, sizeof(int));
   buffer[42] = 42;
-// CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: WRITE of size 4 at [[ADDR]] thread T0
-// CHECK-NEXT: {{#0 .* main .*calloc_right_oob.cpp}}:[[@LINE-3]]
-// CHECK: [[ADDR]] is located 0 bytes after 168-byte region
-// CHECK: allocated by thread T0 here:
-// CHECK-NEXT: {{#0 .* calloc }}
-// CHECK-NEXT: {{#1 .* main .*calloc_right_oob.cpp}}:[[@LINE-8]]
+  // CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: WRITE of size 4 at [[ADDR]] thread T0
+  // CHECK-NEXT: {{#0 .* main .*calloc_right_oob.cpp}}:[[@LINE-3]]
+  // CHECK: [[ADDR]] is located 0 bytes after 168-byte region
+  // CHECK: allocated by thread T0 here:
+  // CHECK-NEXT: {{#0 .* calloc }}
+  // CHECK: {{ #[1-2] .* main .*calloc_right_oob.cpp}}:[[@LINE-8]]
   free(buffer);
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/calloc_uaf.cpp b/compiler-rt/test/asan/TestCases/Windows/calloc_uaf.cpp
index 6c225d4..bf13f7d 100644
--- a/compiler-rt/test/asan/TestCases/Windows/calloc_uaf.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/calloc_uaf.cpp
@@ -7,14 +7,14 @@ int main() {
   int *buffer = (int*)calloc(42, sizeof(int));
   free(buffer);
   buffer[0] = 42;
-// CHECK: AddressSanitizer: heap-use-after-free on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: WRITE of size 4 at [[ADDR]] thread T0
-// CHECK-NEXT: {{#0 .* main .*calloc_uaf.cpp}}:[[@LINE-3]]
-// CHECK: [[ADDR]] is located 0 bytes inside of 168-byte region
-// CHECK: freed by thread T0 here:
-// CHECK-NEXT: {{#0 .* free }}
-// CHECK-NEXT: {{#1 .* main .*calloc_uaf.cpp}}:[[@LINE-8]]
-// CHECK: previously allocated by thread T0 here:
-// CHECK-NEXT: {{#0 .* calloc }}
-// CHECK-NEXT: {{#1 .* main .*calloc_uaf.cpp}}:[[@LINE-12]]
+  // CHECK: AddressSanitizer: heap-use-after-free on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: WRITE of size 4 at [[ADDR]] thread T0
+  // CHECK-NEXT: {{#0 .* main .*calloc_uaf.cpp}}:[[@LINE-3]]
+  // CHECK: [[ADDR]] is located 0 bytes inside of 168-byte region
+  // CHECK: freed by thread T0 here:
+  // CHECK-NEXT: {{#0 .* free }}
+  // CHECK: {{ #[1-2] .* main .*calloc_uaf.cpp}}:[[@LINE-8]]
+  // CHECK: previously allocated by thread T0 here:
+  // CHECK-NEXT: {{#0 .* calloc }}
+  // CHECK: {{ #[1-2] .* main .*calloc_uaf.cpp}}:[[@LINE-12]]
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/dll_heap_allocation.cpp b/compiler-rt/test/asan/TestCases/Windows/dll_heap_allocation.cpp
index b8c2c1a..3606b1c 100644
--- a/compiler-rt/test/asan/TestCases/Windows/dll_heap_allocation.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/dll_heap_allocation.cpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cl %LD %s %Fe%t.dll -DHEAP_LIBRARY %MD \
 // RUN:   %if target={{.*-windows-gnu}} %{ -Wl,--out-implib,%t.lib %}
-// RUN: %clang_cl %s %t.lib %Fe%t -fsanitize=address %MT
+// RUN: %clang_cl_asan %s %t.lib %Fe%t
 // RUN: %run %t 2>&1 | FileCheck %s
 
 // Check that ASan does not fail when releasing allocations that occurred within
diff --git a/compiler-rt/test/asan/TestCases/Windows/dll_host.cpp b/compiler-rt/test/asan/TestCases/Windows/dll_host.cpp
index 0757af9..85b7967 100644
--- a/compiler-rt/test/asan/TestCases/Windows/dll_host.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/dll_host.cpp
@@ -5,48 +5,6 @@
 // Just make sure we can compile this.
 // The actual compile&run sequence is to be done by the DLL tests.
 // RUN: %clang_cl_asan -Od %s -Fe%t
-//
-// Get the list of ASan wrappers exported by the main module RTL:
-// note: The mangling decoration (i.e. @4 )is removed because calling convention
-//       differ from 32-bit and 64-bit.
-// RUN: dumpbin /EXPORTS %t | grep -o "__asan_wrap[^ ]*" | sed -e s/@.*// > %t.exports1
-//
-// The exception handlers differ in 32-bit and 64-bit, so we ignore them:
-// RUN: grep '[E]XPORT:' %s | sed -e 's/.*[E]XPORT: //' > %t.exports2
-// EXPORT: __asan_wrap__except_handler3
-// EXPORT: __asan_wrap__except_handler4
-// EXPORT: __asan_wrap___C_specific_handler
-//
-// Get the list of ASan wrappers imported by the DLL RTL:
-// [BEWARE: be really careful with the sed commands, as this test can be run
-//  from different environments with different shells and seds]
-// RUN: grep INTERCEPT_LIBRARY_FUNCTION %p/../../../../lib/asan/asan_win_dll_thunk.cpp \
-// RUN:  | grep -v define | sed -e s/.*(/__asan_wrap_/ -e s/).*//              \
-// RUN:  > %t.imports1
-//
-// Add functions interecepted in asan_malloc.win.cpp and asan_win.cpp.
-// RUN: grep '[I]MPORT:' %s | sed -e 's/.*[I]MPORT: //' > %t.imports2
-// IMPORT: __asan_wrap_HeapAlloc
-// IMPORT: __asan_wrap_HeapFree
-// IMPORT: __asan_wrap_HeapReAlloc
-// IMPORT: __asan_wrap_HeapSize
-// IMPORT: __asan_wrap_CreateThread
-// IMPORT: __asan_wrap_RaiseException
-// IMPORT: __asan_wrap_RtlRaiseException
-// IMPORT: __asan_wrap_SetUnhandledExceptionFilter
-// IMPORT: __asan_wrap_RtlSizeHeap
-// IMPORT: __asan_wrap_RtlAllocateHeap
-// IMPORT: __asan_wrap_RtlReAllocateHeap
-// IMPORT: __asan_wrap_RtlFreeHeap
-//
-// RUN: cat %t.imports1 %t.imports2 | sort | uniq > %t.imports-sorted
-// RUN: cat %t.exports1 %t.exports2 | sort | uniq > %t.exports-sorted
-//
-// Now make sure the DLL thunk imports everything:
-// RUN: echo
-// RUN: echo "=== NOTE === If you see a mismatch below, please update asan_win_dll_thunk.cpp"
-// RUN: diff %t.imports-sorted %t.exports-sorted
-// REQUIRES: asan-static-runtime
 
 #include <stdio.h>
 #include <windows.h>
diff --git a/compiler-rt/test/asan/TestCases/Windows/dll_malloc_left_oob.cpp b/compiler-rt/test/asan/TestCases/Windows/dll_malloc_left_oob.cpp
index 6d550eb..5ca48fb 100644
--- a/compiler-rt/test/asan/TestCases/Windows/dll_malloc_left_oob.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/dll_malloc_left_oob.cpp
@@ -7,17 +7,17 @@ extern "C" __declspec(dllexport)
 int test_function() {
   char *buffer = (char*)malloc(42);
   buffer[-1] = 42;
-// CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: WRITE of size 1 at [[ADDR]] thread T0
-// CHECK-NEXT: test_function {{.*}}dll_malloc_left_oob.cpp:[[@LINE-3]]
-// CHECK-NEXT: main {{.*}}dll_host.cpp
-//
-// CHECK: [[ADDR]] is located 1 bytes before 42-byte region
-// CHECK-LABEL: allocated by thread T0 here:
-// CHECK-NEXT:   malloc
-// CHECK-NEXT:   test_function {{.*}}dll_malloc_left_oob.cpp:[[@LINE-10]]
-// CHECK-NEXT:   main {{.*}}dll_host.cpp
-// CHECK-LABEL: SUMMARY
+  // CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: WRITE of size 1 at [[ADDR]] thread T0
+  // CHECK-NEXT: test_function {{.*}}dll_malloc_left_oob.cpp:[[@LINE-3]]
+  // CHECK-NEXT: main {{.*}}dll_host.cpp
+  //
+  // CHECK: [[ADDR]] is located 1 bytes before 42-byte region
+  // CHECK-LABEL: allocated by thread T0 here:
+  // CHECK-NEXT:   malloc
+  // CHECK:   test_function {{.*}}dll_malloc_left_oob.cpp:[[@LINE-10]]
+  // CHECK-NEXT:   main {{.*}}dll_host.cpp
+  // CHECK-LABEL: SUMMARY
   free(buffer);
   return 0;
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/dll_malloc_uaf.cpp b/compiler-rt/test/asan/TestCases/Windows/dll_malloc_uaf.cpp
index bc701e9..ae23f86 100644
--- a/compiler-rt/test/asan/TestCases/Windows/dll_malloc_uaf.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/dll_malloc_uaf.cpp
@@ -9,20 +9,20 @@ int test_function() {
   int *buffer = (int*)malloc(42);
   free(buffer);
   buffer[0] = 42;
-// CHECK: AddressSanitizer: heap-use-after-free on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: WRITE of size 4 at [[ADDR]] thread T0
-// CHECK-NEXT:  test_function {{.*}}dll_malloc_uaf.cpp:[[@LINE-3]]
-// CHECK-NEXT:  main {{.*}}dll_host
-//
-// CHECK: [[ADDR]] is located 0 bytes inside of 42-byte region
-// CHECK-LABEL: freed by thread T0 here:
-// CHECK-NEXT:  free
-// CHECK-NEXT:  test_function {{.*}}dll_malloc_uaf.cpp:[[@LINE-10]]
-// CHECK-NEXT:  main {{.*}}dll_host
-//
-// CHECK-LABEL: previously allocated by thread T0 here:
-// CHECK-NEXT:  malloc
-// CHECK-NEXT:  test_function {{.*}}dll_malloc_uaf.cpp:[[@LINE-16]]
-// CHECK-NEXT:  main {{.*}}dll_host
+  // CHECK: AddressSanitizer: heap-use-after-free on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: WRITE of size 4 at [[ADDR]] thread T0
+  // CHECK-NEXT:  test_function {{.*}}dll_malloc_uaf.cpp:[[@LINE-3]]
+  // CHECK-NEXT:  main {{.*}}dll_host
+  //
+  // CHECK: [[ADDR]] is located 0 bytes inside of 42-byte region
+  // CHECK-LABEL: freed by thread T0 here:
+  // CHECK-NEXT:  free
+  // CHECK:  test_function {{.*}}dll_malloc_uaf.cpp:[[@LINE-10]]
+  // CHECK-NEXT:  main {{.*}}dll_host
+  //
+  // CHECK-LABEL: previously allocated by thread T0 here:
+  // CHECK-NEXT:  malloc
+  // CHECK:  test_function {{.*}}dll_malloc_uaf.cpp:[[@LINE-16]]
+  // CHECK-NEXT:  main {{.*}}dll_host
   return 0;
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/double_free.cpp b/compiler-rt/test/asan/TestCases/Windows/double_free.cpp
index 45568e5..e288b40 100644
--- a/compiler-rt/test/asan/TestCases/Windows/double_free.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/double_free.cpp
@@ -7,15 +7,15 @@ int main() {
   int *x = (int*)malloc(42 * sizeof(int));
   free(x);
   free(x);
-// CHECK: AddressSanitizer: attempting double-free on [[ADDR:0x[0-9a-f]+]]
-// CHECK-NEXT: {{#0 .* free }}
-// CHECK-NEXT: {{#1 .* main .*double_free.cpp}}:[[@LINE-3]]
-// CHECK: [[ADDR]] is located 0 bytes inside of 168-byte region
-// CHECK-LABEL: freed by thread T0 here:
-// CHECK-NEXT: {{#0 .* free }}
-// CHECK-NEXT: {{#1 .* main .*double_free.cpp}}:[[@LINE-8]]
-// CHECK-LABEL: previously allocated by thread T0 here:
-// CHECK-NEXT: {{#0 .* malloc }}
-// CHECK-NEXT: {{#1 .* main .*double_free.cpp}}:[[@LINE-12]]
+  // CHECK: AddressSanitizer: attempting double-free on [[ADDR:0x[0-9a-f]+]]
+  // CHECK-NEXT: {{#0 .* free }}
+  // CHECK: {{ #[1-2] .* main .*double_free.cpp}}:[[@LINE-3]]
+  // CHECK: [[ADDR]] is located 0 bytes inside of 168-byte region
+  // CHECK-LABEL: freed by thread T0 here:
+  // CHECK-NEXT: {{#0 .* free }}
+  // CHECK: {{ #[1-2] .* main .*double_free.cpp}}:[[@LINE-8]]
+  // CHECK-LABEL: previously allocated by thread T0 here:
+  // CHECK-NEXT: {{#0 .* malloc }}
+  // CHECK: {{ #[1-2] .* main .*double_free.cpp}}:[[@LINE-12]]
   return 0;
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/interface_symbols_windows.cpp b/compiler-rt/test/asan/TestCases/Windows/interface_symbols_windows.cpp
deleted file mode 100644
index 1803911..0000000
--- a/compiler-rt/test/asan/TestCases/Windows/interface_symbols_windows.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-// UNSUPPORTED: target={{.*-windows-gnu}}
-
-// Check that the interface exported by asan static lib matches the list of
-// functions mentioned in sanitizer_interface.inc.
-//
-// Just make sure we can compile this.
-// RUN: %clang_cl_asan -Od %s -Fe%t
-//
-// note: The mangling decoration (i.e. @4 )is removed because calling convention
-//       differ from 32-bit and 64-bit.
-//
-// RUN: dumpbin /EXPORTS %t | sed "s/=.*//"                                    \
-// RUN:   | grep -o "\(__asan_\|__ubsan_\|__sanitizer_\|__sancov_\)[^ ]*"      \
-// RUN:   | grep -v "__asan_wrap"                                              \
-// RUN:   | sed -e s/@.*// > %t.exports
-//
-// [BEWARE: be really careful with the sed commands, as this test can be run
-//  from different environments with different shells and seds]
-//
-// RUN: sed ':a;N;$!ba;s/([\n ]*/(/g'                                          \
-// RUN:  %p/../../../../lib/asan/asan_interface.inc                            \
-// RUN:  %p/../../../../lib/ubsan/ubsan_interface.inc                          \
-// RUN:  %p/../../../../lib/sanitizer_common/sanitizer_common_interface.inc    \
-// RUN:  %p/../../../../lib/sanitizer_common/sanitizer_coverage_interface.inc  \
-// RUN:  | grep -e "^INTERFACE_FUNCTION"                                       \
-// RUN:  | sed -e "s/.*(//" -e "s/).*//" > %t.imports1
-//
-// RUN: sed ':a;N;$!ba;s/([\n ]*/(/g'                                          \
-// RUN:  %p/../../../../lib/asan/asan_interface.inc                            \
-// RUN:  %p/../../../../lib/ubsan/ubsan_interface.inc                          \
-// RUN:  %p/../../../../lib/sanitizer_common/sanitizer_common_interface.inc    \
-// RUN:  %p/../../../../lib/sanitizer_common/sanitizer_coverage_interface.inc  \
-// RUN:  | grep -e "^INTERFACE_WEAK_FUNCTION"                                  \
-// RUN:  | sed -e "s/.*(//" -e "s/).*/__dll/" > %t.imports2
-//
-// Add functions not included in the interface lists:
-// RUN: grep '[I]MPORT:' %s | sed -e 's/.*[I]MPORT: //' > %t.imports3
-// IMPORT: __asan_shadow_memory_dynamic_address
-// IMPORT: __asan_get_shadow_memory_dynamic_address
-// IMPORT: __asan_option_detect_stack_use_after_return
-// IMPORT: __asan_should_detect_stack_use_after_return
-// IMPORT: __asan_set_seh_filter
-// IMPORT: __asan_unhandled_exception_filter
-// IMPORT: __asan_test_only_reported_buggy_pointer
-// IMPORT: __ubsan_vptr_type_cache
-//
-// RUN: cat %t.imports1 %t.imports2 %t.imports3 | sort | uniq > %t.imports-sorted
-// RUN: cat %t.exports | sort | uniq > %t.exports-sorted
-//
-// Now make sure the DLL thunk imports everything:
-// RUN: echo
-// RUN: echo "=== NOTE === If you see a mismatch below, please update interface.inc files."
-// RUN: diff %t.imports-sorted %t.exports-sorted
-// REQUIRES: asan-static-runtime
-
-int main() { return 0; }
diff --git a/compiler-rt/test/asan/TestCases/Windows/malloc_left_oob.cpp b/compiler-rt/test/asan/TestCases/Windows/malloc_left_oob.cpp
index 2ee5fdc..7ea95d2 100644
--- a/compiler-rt/test/asan/TestCases/Windows/malloc_left_oob.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/malloc_left_oob.cpp
@@ -6,12 +6,12 @@
 int main() {
   char *buffer = (char*)malloc(42);
   buffer[-1] = 42;
-// CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: WRITE of size 1 at [[ADDR]] thread T0
-// CHECK-NEXT: {{#0 .* main .*malloc_left_oob.cpp}}:[[@LINE-3]]
-// CHECK: [[ADDR]] is located 1 bytes before 42-byte region
-// CHECK: allocated by thread T0 here:
-// CHECK-NEXT: {{#0 .* malloc }}
-// CHECK-NEXT: {{#1 .* main .*malloc_left_oob.cpp}}:[[@LINE-8]]
+  // CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: WRITE of size 1 at [[ADDR]] thread T0
+  // CHECK-NEXT: {{#0 .* main .*malloc_left_oob.cpp}}:[[@LINE-3]]
+  // CHECK: [[ADDR]] is located 1 bytes before 42-byte region
+  // CHECK: allocated by thread T0 here:
+  // CHECK-NEXT: {{#0 .* malloc }}
+  // CHECK: {{ #[1-2] .* main .*malloc_left_oob.cpp}}:[[@LINE-8]]
   free(buffer);
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/malloc_right_oob.cpp b/compiler-rt/test/asan/TestCases/Windows/malloc_right_oob.cpp
index dafca74..1495632 100644
--- a/compiler-rt/test/asan/TestCases/Windows/malloc_right_oob.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/malloc_right_oob.cpp
@@ -6,12 +6,12 @@
 int main() {
   char *buffer = (char*)malloc(42);
   buffer[42] = 42;
-// CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: WRITE of size 1 at [[ADDR]] thread T0
-// CHECK-NEXT: {{#0 .* main .*malloc_right_oob.cpp}}:[[@LINE-3]]
-// CHECK: [[ADDR]] is located 0 bytes after 42-byte region
-// CHECK: allocated by thread T0 here:
-// CHECK-NEXT: {{#0 .* malloc }}
-// CHECK-NEXT: {{#1 .* main .*malloc_right_oob.cpp}}:[[@LINE-8]]
+  // CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: WRITE of size 1 at [[ADDR]] thread T0
+  // CHECK-NEXT: {{#0 .* main .*malloc_right_oob.cpp}}:[[@LINE-3]]
+  // CHECK: [[ADDR]] is located 0 bytes after 42-byte region
+  // CHECK: allocated by thread T0 here:
+  // CHECK-NEXT: {{#0 .* malloc }}
+  // CHECK: {{ #[1-2] .* main .*malloc_right_oob.cpp}}:[[@LINE-8]]
   free(buffer);
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/malloc_uaf.cpp b/compiler-rt/test/asan/TestCases/Windows/malloc_uaf.cpp
index 256582d..d1eac7e 100644
--- a/compiler-rt/test/asan/TestCases/Windows/malloc_uaf.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/malloc_uaf.cpp
@@ -7,14 +7,14 @@ int main() {
   char *buffer = (char*)malloc(42);
   free(buffer);
   buffer[0] = 42;
-// CHECK: AddressSanitizer: heap-use-after-free on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: WRITE of size 1 at [[ADDR]] thread T0
-// CHECK-NEXT: {{#0 .* main .*malloc_uaf.cpp}}:[[@LINE-3]]
-// CHECK: [[ADDR]] is located 0 bytes inside of 42-byte region
-// CHECK: freed by thread T0 here:
-// CHECK-NEXT: {{#0 .* free }}
-// CHECK-NEXT: {{#1 .* main .*malloc_uaf.cpp}}:[[@LINE-8]]
-// CHECK: previously allocated by thread T0 here:
-// CHECK-NEXT: {{#0 .* malloc }}
-// CHECK-NEXT: {{#1 .* main .*malloc_uaf.cpp}}:[[@LINE-12]]
+  // CHECK: AddressSanitizer: heap-use-after-free on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: WRITE of size 1 at [[ADDR]] thread T0
+  // CHECK-NEXT: {{#0 .* main .*malloc_uaf.cpp}}:[[@LINE-3]]
+  // CHECK: [[ADDR]] is located 0 bytes inside of 42-byte region
+  // CHECK: freed by thread T0 here:
+  // CHECK-NEXT: {{#0 .* free }}
+  // CHECK: {{ #[1-2] .* main .*malloc_uaf.cpp}}:[[@LINE-8]]
+  // CHECK: previously allocated by thread T0 here:
+  // CHECK-NEXT: {{#0 .* malloc }}
+  // CHECK: {{ #[1-2] .* main .*malloc_uaf.cpp}}:[[@LINE-12]]
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/realloc_left_oob.cpp b/compiler-rt/test/asan/TestCases/Windows/realloc_left_oob.cpp
index 4a5a7ab..ebde5f1 100644
--- a/compiler-rt/test/asan/TestCases/Windows/realloc_left_oob.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/realloc_left_oob.cpp
@@ -6,12 +6,12 @@
 int main() {
   char *buffer = (char*)realloc(0, 42);
   buffer[-1] = 42;
-// CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: WRITE of size 1 at [[ADDR]] thread T0
-// CHECK-NEXT: {{#0 .* main .*realloc_left_oob.cpp}}:[[@LINE-3]]
-// CHECK: [[ADDR]] is located 1 bytes before 42-byte region
-// CHECK: allocated by thread T0 here:
-// CHECK-NEXT: {{#0 .* realloc }}
-// CHECK-NEXT: {{#1 .* main .*realloc_left_oob.cpp}}:[[@LINE-8]]
+  // CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: WRITE of size 1 at [[ADDR]] thread T0
+  // CHECK-NEXT: {{#0 .* main .*realloc_left_oob.cpp}}:[[@LINE-3]]
+  // CHECK: [[ADDR]] is located 1 bytes before 42-byte region
+  // CHECK: allocated by thread T0 here:
+  // CHECK-NEXT: {{#0 .* realloc }}
+  // CHECK: {{ #[1-2] .* main .*realloc_left_oob.cpp}}:[[@LINE-8]]
   free(buffer);
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/realloc_right_oob.cpp b/compiler-rt/test/asan/TestCases/Windows/realloc_right_oob.cpp
index 8f3109e..281efed 100644
--- a/compiler-rt/test/asan/TestCases/Windows/realloc_right_oob.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/realloc_right_oob.cpp
@@ -6,12 +6,12 @@
 int main() {
   char *buffer = (char*)realloc(0, 42);
   buffer[42] = 42;
-// CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: WRITE of size 1 at [[ADDR]] thread T0
-// CHECK-NEXT: {{#0 .* main .*realloc_right_oob.cpp}}:[[@LINE-3]]
-// CHECK: [[ADDR]] is located 0 bytes after 42-byte region
-// CHECK: allocated by thread T0 here:
-// CHECK-NEXT: {{#0 .* realloc }}
-// CHECK-NEXT: {{#1 .* main .*realloc_right_oob.cpp}}:[[@LINE-8]]
+  // CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: WRITE of size 1 at [[ADDR]] thread T0
+  // CHECK-NEXT: {{#0 .* main .*realloc_right_oob.cpp}}:[[@LINE-3]]
+  // CHECK: [[ADDR]] is located 0 bytes after 42-byte region
+  // CHECK: allocated by thread T0 here:
+  // CHECK-NEXT: {{#0 .* realloc }}
+  // CHECK: {{ #[1-2] .* main .*realloc_right_oob.cpp}}:[[@LINE-8]]
   free(buffer);
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/realloc_uaf.cpp b/compiler-rt/test/asan/TestCases/Windows/realloc_uaf.cpp
index 074ac27..6ff2217 100644
--- a/compiler-rt/test/asan/TestCases/Windows/realloc_uaf.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/realloc_uaf.cpp
@@ -7,14 +7,14 @@ int main() {
   char *buffer = (char*)realloc(0, 42);
   free(buffer);
   buffer[0] = 42;
-// CHECK: AddressSanitizer: heap-use-after-free on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: WRITE of size 1 at [[ADDR]] thread T0
-// CHECK-NEXT: {{#0 .* main .*realloc_uaf.cpp}}:[[@LINE-3]]
-// CHECK: [[ADDR]] is located 0 bytes inside of 42-byte region
-// CHECK: freed by thread T0 here:
-// CHECK-NEXT: {{#0 .* free }}
-// CHECK-NEXT: {{#1 .* main .*realloc_uaf.cpp}}:[[@LINE-8]]
-// CHECK: previously allocated by thread T0 here:
-// CHECK-NEXT: {{#0 .* realloc }}
-// CHECK-NEXT: {{#1 .* main .*realloc_uaf.cpp}}:[[@LINE-12]]
+  // CHECK: AddressSanitizer: heap-use-after-free on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: WRITE of size 1 at [[ADDR]] thread T0
+  // CHECK-NEXT: {{#0 .* main .*realloc_uaf.cpp}}:[[@LINE-3]]
+  // CHECK: [[ADDR]] is located 0 bytes inside of 42-byte region
+  // CHECK: freed by thread T0 here:
+  // CHECK-NEXT: {{#0 .* free }}
+  // CHECK: {{ #[1-2] .* main .*realloc_uaf.cpp}}:[[@LINE-8]]
+  // CHECK: previously allocated by thread T0 here:
+  // CHECK-NEXT: {{#0 .* realloc }}
+  // CHECK: {{ #[1-2] .* main .*realloc_uaf.cpp}}:[[@LINE-12]]
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/symbols_path.cpp b/compiler-rt/test/asan/TestCases/Windows/symbols_path.cpp
index d896da4..be99c89 100644
--- a/compiler-rt/test/asan/TestCases/Windows/symbols_path.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/symbols_path.cpp
@@ -11,12 +11,12 @@
 int main() {
   char *buffer = (char*)malloc(42);
   buffer[-1] = 42;
-// CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: WRITE of size 1 at [[ADDR]] thread T0
-// CHECK-NEXT: {{#0 .* main .*symbols_path.cpp}}:[[@LINE-3]]
-// CHECK: [[ADDR]] is located 1 bytes before 42-byte region
-// CHECK: allocated by thread T0 here:
-// CHECK-NEXT: {{#0 .* malloc}}
-// CHECK-NEXT: {{#1 .* main .*symbols_path.cpp}}:[[@LINE-8]]
+  // CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: WRITE of size 1 at [[ADDR]] thread T0
+  // CHECK-NEXT: {{#0 .* main .*symbols_path.cpp}}:[[@LINE-3]]
+  // CHECK: [[ADDR]] is located 1 bytes before 42-byte region
+  // CHECK: allocated by thread T0 here:
+  // CHECK-NEXT: {{#0 .* malloc}}
+  // CHECK: {{ #[1-2] .* main .*symbols_path.cpp}}:[[@LINE-8]]
   free(buffer);
 }
diff --git a/compiler-rt/test/asan/TestCases/Windows/use_after_realloc.cpp b/compiler-rt/test/asan/TestCases/Windows/use_after_realloc.cpp
index dfd25ce..4c32c63 100644
--- a/compiler-rt/test/asan/TestCases/Windows/use_after_realloc.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/use_after_realloc.cpp
@@ -9,15 +9,15 @@ int main() {
   buffer = (char*)realloc(buffer, 64);
   // The 'stale' may now point to a free'd memory.
   stale[0] = 42;
-// CHECK: AddressSanitizer: heap-use-after-free on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: WRITE of size 1 at [[ADDR]] thread T0
-// CHECK-NEXT: {{#0 .* main .*use_after_realloc.cpp}}:[[@LINE-3]]
-// CHECK: [[ADDR]] is located 0 bytes inside of 32-byte region
-// CHECK: freed by thread T0 here:
-// CHECK-NEXT: {{#0 .* realloc }}
-// CHECK-NEXT: {{#1 .* main .*use_after_realloc.cpp}}:[[@LINE-9]]
-// CHECK: previously allocated by thread T0 here:
-// CHECK-NEXT: {{#0 .* realloc }}
-// CHECK-NEXT: {{#1 .* main .*use_after_realloc.cpp}}:[[@LINE-14]]
+  // CHECK: AddressSanitizer: heap-use-after-free on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: WRITE of size 1 at [[ADDR]] thread T0
+  // CHECK-NEXT: {{#0 .* main .*use_after_realloc.cpp}}:[[@LINE-3]]
+  // CHECK: [[ADDR]] is located 0 bytes inside of 32-byte region
+  // CHECK: freed by thread T0 here:
+  // CHECK-NEXT: {{#0 .* realloc }}
+  // CHECK: {{ #[1-2] .* main .*use_after_realloc.cpp}}:[[@LINE-9]]
+  // CHECK: previously allocated by thread T0 here:
+  // CHECK-NEXT: {{#0 .* realloc }}
+  // CHECK: {{ #[1-2] .* main .*use_after_realloc.cpp}}:[[@LINE-14]]
   free(buffer);
 }
diff --git a/compiler-rt/test/asan/TestCases/calloc-overflow.cpp b/compiler-rt/test/asan/TestCases/calloc-overflow.cpp
index 9a0d41f..b930b65 100644
--- a/compiler-rt/test/asan/TestCases/calloc-overflow.cpp
+++ b/compiler-rt/test/asan/TestCases/calloc-overflow.cpp
@@ -11,7 +11,7 @@ int main() {
   void *p = calloc(-1, 1000);
   // CHECK: {{ERROR: AddressSanitizer: calloc parameters overflow: count \* size \(.* \* 1000\) cannot be represented in type size_t}}
   // CHECK: {{#0 0x.* in .*calloc}}
-  // CHECK: {{#1 0x.* in main .*calloc-overflow.cpp:}}[[@LINE-3]]
+  // CHECK: {{#[1-3] 0x.* in main .*calloc-overflow.cpp:}}[[@LINE-3]]
   // CHECK: SUMMARY: AddressSanitizer: calloc-overflow
 
   printf("calloc returned: %zu\n", (size_t)p);
diff --git a/compiler-rt/test/asan/TestCases/deep_stack_uaf.cpp b/compiler-rt/test/asan/TestCases/deep_stack_uaf.cpp
index bdf0dbd..7d8a092 100644
--- a/compiler-rt/test/asan/TestCases/deep_stack_uaf.cpp
+++ b/compiler-rt/test/asan/TestCases/deep_stack_uaf.cpp
@@ -1,7 +1,7 @@
 // Check that we can store lots of stack frames if asked to.
 
 // RUN: %clangxx_asan -O0 %s -o %t 2>&1
-// RUN: %env_asan_opts=malloc_context_size=120:redzone=512 not %run %t 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=malloc_context_size=125:redzone=512 not %run %t 2>&1 | FileCheck %s
 // REQUIRES: stable-runtime
 #include <stdlib.h>
 #include <stdio.h>
diff --git a/compiler-rt/test/asan/TestCases/double-free.cpp b/compiler-rt/test/asan/TestCases/double-free.cpp
index b01e410..7b61df07 100644
--- a/compiler-rt/test/asan/TestCases/double-free.cpp
+++ b/compiler-rt/test/asan/TestCases/double-free.cpp
@@ -19,10 +19,10 @@ int main(int argc, char **argv) {
   free(x + argc - 1);  // BOOM
   // CHECK: AddressSanitizer: attempting double-free{{.*}}in thread T0
   // CHECK: #0 0x{{.*}} in {{.*}}free
-  // CHECK: #1 0x{{.*}} in main {{.*}}double-free.cpp:[[@LINE-3]]
+  // CHECK: #{{[1-3]}} 0x{{.*}} in main {{.*}}double-free.cpp:[[@LINE-3]]
   // CHECK: freed by thread T0 here:
   // MALLOC-CTX: #0 0x{{.*}} in {{.*}}free
-  // MALLOC-CTX: #1 0x{{.*}} in main {{.*}}double-free.cpp:[[@LINE-7]]
+  // MALLOC-CTX: #{{[1-3]}} 0x{{.*}} in main {{.*}}double-free.cpp:[[@LINE-7]]
   // CHECK: allocated by thread T0 here:
   // MALLOC-CTX: double-free.cpp:[[@LINE-12]]
   // CHECK-RECOVER: AddressSanitizer: attempting double-free{{.*}}in thread T0
diff --git a/compiler-rt/test/asan/TestCases/malloc-size-too-big.cpp b/compiler-rt/test/asan/TestCases/malloc-size-too-big.cpp
index 71d6a3e..771640a 100644
--- a/compiler-rt/test/asan/TestCases/malloc-size-too-big.cpp
+++ b/compiler-rt/test/asan/TestCases/malloc-size-too-big.cpp
@@ -18,7 +18,7 @@ int main() {
   void *p = malloc(kMaxAllowedMallocSizePlusOne);
   // CHECK: {{ERROR: AddressSanitizer: requested allocation size .* \(.* after adjustments for alignment, red zones etc\.\) exceeds maximum supported size}}
   // CHECK: {{#0 0x.* in .*malloc}}
-  // CHECK: {{#1 0x.* in main .*malloc-size-too-big.cpp:}}[[@LINE-3]]
+  // CHECK: {{#[1-3] 0x.* in main .*malloc-size-too-big.cpp:}}[[@LINE-3]]
   // CHECK: SUMMARY: AddressSanitizer: allocation-size-too-big
 
   printf("malloc returned: %zu\n", (size_t)p);
diff --git a/compiler-rt/test/asan/TestCases/strncpy-overflow.cpp b/compiler-rt/test/asan/TestCases/strncpy-overflow.cpp
index d852cce..ff84052 100644
--- a/compiler-rt/test/asan/TestCases/strncpy-overflow.cpp
+++ b/compiler-rt/test/asan/TestCases/strncpy-overflow.cpp
@@ -33,6 +33,6 @@ int main(int argc, char **argv) {
   // CHECK: {{0x.* is located 0 bytes after 9-byte region}}
   // CHECK: {{allocated by thread T0 here:}}
   // CHECK: {{    #0 0x.* in .*malloc}}
-  // CHECK: {{    #1 0x.* in main .*strncpy-overflow.cpp:}}[[@LINE-8]]
+  // CHECK: {{    #[1-3] 0x.* in main .*strncpy-overflow.cpp:}}[[@LINE-8]]
   return rval + sink_memory(9, short_buffer);
 }
diff --git a/compiler-rt/test/asan/TestCases/use-after-free-right.cpp b/compiler-rt/test/asan/TestCases/use-after-free-right.cpp
index 10dbe5b..11011e4 100644
--- a/compiler-rt/test/asan/TestCases/use-after-free-right.cpp
+++ b/compiler-rt/test/asan/TestCases/use-after-free-right.cpp
@@ -19,9 +19,9 @@ int main() {
   // CHECK: {{0x.* is located 0 bytes inside of 1-byte region .0x.*,0x.*}}
   // CHECK: {{freed by thread T0 here:}}
   // CHECK: {{    #0 0x.* in .*free}}
-  // CHECK: {{    #1 0x.* in main .*use-after-free-right.cpp:}}[[@LINE-9]]
+  // CHECK: {{    #[1-3] 0x.* in main .*use-after-free-right.cpp:}}[[@LINE-9]]
 
   // CHECK: {{previously allocated by thread T0 here:}}
   // CHECK: {{    #0 0x.* in .*malloc}}
-  // CHECK: {{    #1 0x.* in main .*use-after-free-right.cpp:}}[[@LINE-14]]
+  // CHECK: {{    #[1-3] 0x.* in main .*use-after-free-right.cpp:}}[[@LINE-14]]
 }
diff --git a/compiler-rt/test/asan/TestCases/use-after-free.cpp b/compiler-rt/test/asan/TestCases/use-after-free.cpp
index 4b20ee2..f19c461 100644
--- a/compiler-rt/test/asan/TestCases/use-after-free.cpp
+++ b/compiler-rt/test/asan/TestCases/use-after-free.cpp
@@ -16,11 +16,11 @@ int main() {
   // CHECK: {{0x.* is located 5 bytes inside of 10-byte region .0x.*,0x.*}}
   // CHECK: {{freed by thread T0 here:}}
   // CHECK: {{    #0 0x.* in .*free}}
-  // CHECK: {{    #1 0x.* in main .*use-after-free.cpp:}}[[@LINE-9]]
+  // CHECK: {{    #[1-3] 0x.* in main .*use-after-free.cpp:}}[[@LINE-9]]
 
   // CHECK: {{previously allocated by thread T0 here:}}
   // CHECK: {{    #0 0x.* in .*malloc}}
-  // CHECK: {{    #1 0x.* in main .*use-after-free.cpp:}}[[@LINE-14]]
+  // CHECK: {{    #[1-3] 0x.* in main .*use-after-free.cpp:}}[[@LINE-14]]
   // CHECK: Shadow byte legend (one shadow byte represents {{[0-9]+}} application bytes):
   // CHECK: Global redzone:
   // CHECK: ASan internal:
diff --git a/compiler-rt/test/sanitizer_common/TestCases/corelimit.cpp b/compiler-rt/test/sanitizer_common/TestCases/corelimit.cpp
index fed2e1d..2378a4cf 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/corelimit.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/corelimit.cpp
@@ -10,12 +10,7 @@ int main() {
   getrlimit(RLIMIT_CORE, &lim_core);
   void *p;
   if (sizeof(p) == 8) {
-#ifdef __linux__
-    // See comments in DisableCoreDumperIfNecessary().
-    assert(lim_core.rlim_cur == 1);
-#else
-    assert(lim_core.rlim_cur == 0);
-#endif
+    assert(0 == lim_core.rlim_cur);
   }
   return 0;
 }
diff --git a/compiler-rt/test/tsan/signal_errno.cpp b/compiler-rt/test/tsan/signal_errno.cpp
index 7e1fd4b..99d4b6d 100644
--- a/compiler-rt/test/tsan/signal_errno.cpp
+++ b/compiler-rt/test/tsan/signal_errno.cpp
@@ -18,7 +18,7 @@ static void MyHandler(int, siginfo_t *s, void *c) {
 
 static void* sendsignal(void *p) {
   barrier_wait(&barrier);
-  pthread_kill(mainth, SIGPROF);
+  pthread_kill(mainth, SIGALRM);
   return 0;
 }
 
@@ -37,7 +37,7 @@ int main() {
   mainth = pthread_self();
   struct sigaction act = {};
   act.sa_sigaction = &MyHandler;
-  sigaction(SIGPROF, &act, 0);
+  sigaction(SIGALRM, &act, 0);
   pthread_t th;
   pthread_create(&th, 0, sendsignal, 0);
   loop();
@@ -46,7 +46,7 @@ int main() {
 }
 
 // CHECK: WARNING: ThreadSanitizer: signal handler spoils errno
-// CHECK:   Signal 27 handler invoked at:
+// CHECK:   Signal 14 handler invoked at:
 // CHECK:     #0 MyHandler(int, {{(__)?}}siginfo{{(_t)?}}*, void*) {{.*}}signal_errno.cpp
 // CHECK:     main
 // CHECK: SUMMARY: ThreadSanitizer: signal handler spoils errno{{.*}}MyHandler
diff --git a/compiler-rt/test/tsan/signal_sync.cpp b/compiler-rt/test/tsan/signal_sync.cpp
index b529a18..b283c93 100644
--- a/compiler-rt/test/tsan/signal_sync.cpp
+++ b/compiler-rt/test/tsan/signal_sync.cpp
@@ -30,7 +30,7 @@ int main() {
 
   struct sigaction act = {};
   act.sa_handler = &handler;
-  if (sigaction(SIGPROF, &act, 0)) {
+  if (sigaction(SIGALRM, &act, 0)) {
     perror("sigaction");
     exit(1);
   }
@@ -39,7 +39,7 @@ int main() {
   t.it_value.tv_sec = 0;
   t.it_value.tv_usec = 10;
   t.it_interval = t.it_value;
-  if (setitimer(ITIMER_PROF, &t, 0)) {
+  if (setitimer(ITIMER_REAL, &t, 0)) {
     perror("setitimer");
     exit(1);
   }
diff --git a/compiler-rt/test/tsan/signal_thread.cpp b/compiler-rt/test/tsan/signal_thread.cpp
index aa91d1d..e5ea441 100644
--- a/compiler-rt/test/tsan/signal_thread.cpp
+++ b/compiler-rt/test/tsan/signal_thread.cpp
@@ -24,7 +24,7 @@ static void* thr(void *p) {
 int main() {
   struct sigaction act = {};
   act.sa_handler = &handler;
-  if (sigaction(SIGPROF, &act, 0)) {
+  if (sigaction(SIGALRM, &act, 0)) {
     perror("sigaction");
     exit(1);
   }
@@ -33,7 +33,7 @@ int main() {
   t.it_value.tv_sec = 0;
   t.it_value.tv_usec = 10;
   t.it_interval = t.it_value;
-  if (setitimer(ITIMER_PROF, &t, 0)) {
+  if (setitimer(ITIMER_REAL, &t, 0)) {
     perror("setitimer");
     exit(1);
   }
diff --git a/compiler-rt/test/tsan/signal_thread2.cpp b/compiler-rt/test/tsan/signal_thread2.cpp
index 9bde4f7..5236628 100644
--- a/compiler-rt/test/tsan/signal_thread2.cpp
+++ b/compiler-rt/test/tsan/signal_thread2.cpp
@@ -40,7 +40,7 @@ static void *thr(void *p) {
 int main() {
   struct sigaction act = {};
   act.sa_handler = &handler;
-  if (sigaction(SIGPROF, &act, 0)) {
+  if (sigaction(SIGALRM, &act, 0)) {
     perror("sigaction");
     exit(1);
   }
@@ -49,7 +49,7 @@ int main() {
   t.it_value.tv_sec = 0;
   t.it_value.tv_usec = 10;
   t.it_interval = t.it_value;
-  if (setitimer(ITIMER_PROF, &t, 0)) {
+  if (setitimer(ITIMER_REAL, &t, 0)) {
     perror("setitimer");
     exit(1);
   }
diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py
index b251f1a..1b0d4d5 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py
@@ -28,6 +28,7 @@ from dex.debugger.lldb.LLDB import LLDB
 from dex.debugger.visualstudio.VisualStudio2015 import VisualStudio2015
 from dex.debugger.visualstudio.VisualStudio2017 import VisualStudio2017
 from dex.debugger.visualstudio.VisualStudio2019 import VisualStudio2019
+from dex.debugger.visualstudio.VisualStudio2022 import VisualStudio2022
 
 
 def _get_potential_debuggers():  # noqa
@@ -41,6 +42,7 @@ def _get_potential_debuggers():  # noqa
         VisualStudio2015.get_option_name(): VisualStudio2015,
         VisualStudio2017.get_option_name(): VisualStudio2017,
         VisualStudio2019.get_option_name(): VisualStudio2019,
+        VisualStudio2022.get_option_name(): VisualStudio2022,
     }
 
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2022.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2022.py
new file mode 100644
index 0000000..6fcf8af
--- /dev/null
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2022.py
@@ -0,0 +1,23 @@
+# DExTer : Debugging Experience Tester
+# ~~~~~~   ~         ~~         ~   ~~
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""Specializations for the Visual Studio 2022 interface."""
+
+from dex.debugger.visualstudio.VisualStudio import VisualStudio
+
+
+class VisualStudio2022(VisualStudio):
+    @classmethod
+    def get_name(cls):
+        return "Visual Studio 2022"
+
+    @classmethod
+    def get_option_name(cls):
+        return "vs2022"
+
+    @property
+    def _dte_version(self):
+        return "VisualStudio.DTE.17.0"
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/list-debuggers/list-debuggers.test b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/list-debuggers/list-debuggers.test
index bbc9dd5..2bce540 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/list-debuggers/list-debuggers.test
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/list-debuggers/list-debuggers.test
@@ -5,3 +5,5 @@ RUN: %dexter_base list-debuggers | FileCheck %s
 CHECK: lldb
 CHECK: vs2015
 CHECK: vs2017
+CHECK: vs2019
+CHECK: vs2022
diff --git a/flang/include/flang/Common/optional.h b/flang/include/flang/Common/optional.h
index 8903321..b5623b8 100644
--- a/flang/include/flang/Common/optional.h
+++ b/flang/include/flang/Common/optional.h
@@ -11,7 +11,7 @@
 // methods were added).
 //
 // The implementation defines optional in Fortran::common namespace.
-// This standalone implementation may beused if the target
+// This standalone implementation may be used if the target
 // does not support std::optional implementation (e.g. CUDA device env),
 // otherwise, Fortran::common::optional is an alias for std::optional.
 //
diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h
index 5389607..aba4002 100644
--- a/flang/include/flang/Evaluate/tools.h
+++ b/flang/include/flang/Evaluate/tools.h
@@ -430,6 +430,28 @@ template <typename A> std::optional<CoarrayRef> ExtractCoarrayRef(const A &x) {
   }
 }
 
+struct ExtractSubstringHelper {
+  template <typename T> static std::optional<Substring> visit(T &&) {
+    return std::nullopt;
+  }
+
+  static std::optional<Substring> visit(const Substring &e) { return e; }
+
+  template <typename T>
+  static std::optional<Substring> visit(const Designator<T> &e) {
+    return std::visit([](auto &&s) { return visit(s); }, e.u);
+  }
+
+  template <typename T>
+  static std::optional<Substring> visit(const Expr<T> &e) {
+    return std::visit([](auto &&s) { return visit(s); }, e.u);
+  }
+};
+
+template <typename A> std::optional<Substring> ExtractSubstring(const A &x) {
+  return ExtractSubstringHelper::visit(x);
+}
+
 // If an expression is simply a whole symbol data designator,
 // extract and return that symbol, else null.
 template <typename A> const Symbol *UnwrapWholeSymbolDataRef(const A &x) {
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index ca15b4b..6927488 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -208,6 +208,9 @@ struct IntrinsicLibrary {
   void genCFProcPointer(llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genCFunLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genCLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
+  template <mlir::arith::CmpIPredicate pred>
+  fir::ExtendedValue genCPtrCompare(mlir::Type,
+                                    llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genCosd(mlir::Type, llvm::ArrayRef<mlir::Value>);
   void genDateAndTime(llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genDim(mlir::Type, llvm::ArrayRef<mlir::Value>);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h b/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h
index c1a7247..fec8c99 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h
@@ -30,6 +30,10 @@ mlir::Value genFraction(fir::FirOpBuilder &builder, mlir::Location loc,
 mlir::Value genMod(fir::FirOpBuilder &builder, mlir::Location loc,
                    mlir::Value a, mlir::Value p);
 
+/// Generate call to Modulo intrinsic runtime routine.
+mlir::Value genModulo(fir::FirOpBuilder &builder, mlir::Location loc,
+                      mlir::Value a, mlir::Value p);
+
 /// Generate call to Nearest intrinsic runtime routine.
 mlir::Value genNearest(fir::FirOpBuilder &builder, mlir::Location loc,
                        mlir::Value x, mlir::Value s);
diff --git a/flang/include/flang/Optimizer/CodeGen/CodeGen.h b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
index cbf02ec..26097da 100644
--- a/flang/include/flang/Optimizer/CodeGen/CodeGen.h
+++ b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
@@ -87,6 +87,9 @@ void populateFIRToLLVMConversionPatterns(fir::LLVMTypeConverter &converter,
                                          mlir::RewritePatternSet &patterns,
                                          fir::FIRToLLVMPassOptions &options);
 
+/// Populate the pattern set with the PreCGRewrite patterns.
+void populatePreCGRewritePatterns(mlir::RewritePatternSet &patterns);
+
 // declarative passes
 #define GEN_PASS_REGISTRATION
 #include "flang/Optimizer/CodeGen/CGPasses.h.inc"
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 65a86d2..f479263 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -2454,6 +2454,7 @@ def fir_CUDAKernelLaunch : fir_Op<"cuda_kernel_launch", [CallOpInterface,
     SymbolRefAttr:$callee,
     I32:$grid_x,
     I32:$grid_y,
+    I32:$grid_z,
     I32:$block_x,
     I32:$block_y,
     I32:$block_z,
@@ -2463,8 +2464,8 @@ def fir_CUDAKernelLaunch : fir_Op<"cuda_kernel_launch", [CallOpInterface,
   );
 
   let assemblyFormat = [{
-    $callee `<` `<` `<` $grid_x `,` $grid_y `,` $block_x `,` $block_y `,`
-        $block_z ( `,` $bytes^ ( `,` $stream^ )? )? `>` `>` `>`
+    $callee `<` `<` `<` $grid_x `,` $grid_y `,` $grid_z `,`$block_x `,`
+        $block_y `,` $block_z ( `,` $bytes^ ( `,` $stream^ )? )? `>` `>` `>`
         `` `(` ( $args^ `:` type($args) )? `)` attr-dict
   }];
 
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 048008a..b2c3d92 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -233,6 +233,7 @@ public:
   NODE(parser, CriticalStmt)
   NODE(parser, CUDAAttributesStmt)
   NODE(parser, CUFKernelDoConstruct)
+  NODE(CUFKernelDoConstruct, StarOrExpr)
   NODE(CUFKernelDoConstruct, Directive)
   NODE(parser, CycleStmt)
   NODE(parser, DataComponentDefStmt)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index f7b72c3..c96abfb 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4297,16 +4297,18 @@ struct OpenACCConstruct {
 // CUF-kernel-do-construct ->
 //     !$CUF KERNEL DO [ (scalar-int-constant-expr) ] <<< grid, block [, stream]
 //     >>> do-construct
-// grid -> * | scalar-int-expr | ( scalar-int-expr-list )
-// block -> * | scalar-int-expr | ( scalar-int-expr-list )
+// star-or-expr -> * | scalar-int-expr
+// grid -> * | scalar-int-expr | ( star-or-expr-list )
+// block -> * | scalar-int-expr | ( star-or-expr-list )
 // stream -> 0, scalar-int-expr | STREAM = scalar-int-expr
 struct CUFKernelDoConstruct {
   TUPLE_CLASS_BOILERPLATE(CUFKernelDoConstruct);
+  WRAPPER_CLASS(StarOrExpr, std::optional<ScalarIntExpr>);
   struct Directive {
     TUPLE_CLASS_BOILERPLATE(Directive);
     CharBlock source;
-    std::tuple<std::optional<ScalarIntConstantExpr>, std::list<ScalarIntExpr>,
-        std::list<ScalarIntExpr>, std::optional<ScalarIntExpr>>
+    std::tuple<std::optional<ScalarIntConstantExpr>, std::list<StarOrExpr>,
+        std::list<StarOrExpr>, std::optional<ScalarIntExpr>>
         t;
   };
   std::tuple<Directive, std::optional<DoConstruct>> t;
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a668ba4..e6511e0 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2508,19 +2508,32 @@ private:
     if (nestedLoops > 1)
       n = builder->getIntegerAttr(builder->getI64Type(), nestedLoops);
 
-    const std::list<Fortran::parser::ScalarIntExpr> &grid = std::get<1>(dir.t);
-    const std::list<Fortran::parser::ScalarIntExpr> &block = std::get<2>(dir.t);
+    const std::list<Fortran::parser::CUFKernelDoConstruct::StarOrExpr> &grid =
+        std::get<1>(dir.t);
+    const std::list<Fortran::parser::CUFKernelDoConstruct::StarOrExpr> &block =
+        std::get<2>(dir.t);
     const std::optional<Fortran::parser::ScalarIntExpr> &stream =
         std::get<3>(dir.t);
 
     llvm::SmallVector<mlir::Value> gridValues;
-    for (const Fortran::parser::ScalarIntExpr &expr : grid)
-      gridValues.push_back(fir::getBase(
-          genExprValue(*Fortran::semantics::GetExpr(expr), stmtCtx)));
+    for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr : grid) {
+      if (expr.v) {
+        gridValues.push_back(fir::getBase(
+            genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx)));
+      } else {
+        // TODO: '*'
+      }
+    }
     llvm::SmallVector<mlir::Value> blockValues;
-    for (const Fortran::parser::ScalarIntExpr &expr : block)
-      blockValues.push_back(fir::getBase(
-          genExprValue(*Fortran::semantics::GetExpr(expr), stmtCtx)));
+    for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr :
+         block) {
+      if (expr.v) {
+        blockValues.push_back(fir::getBase(
+            genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx)));
+      } else {
+        // TODO: '*'
+      }
+    }
     mlir::Value streamValue;
     if (stream)
       streamValue = fir::getBase(
diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt
index 5577a60..f92d1a2 100644
--- a/flang/lib/Lower/CMakeLists.txt
+++ b/flang/lib/Lower/CMakeLists.txt
@@ -25,6 +25,7 @@ add_flang_library(FortranLower
   Mangler.cpp
   OpenACC.cpp
   OpenMP/ClauseProcessor.cpp
+  OpenMP/Clauses.cpp
   OpenMP/DataSharingProcessor.cpp
   OpenMP/OpenMP.cpp
   OpenMP/ReductionProcessor.cpp
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index 6e3ce10..9556933 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -416,7 +416,7 @@ std::pair<fir::ExtendedValue, bool> Fortran::lower::genCallOpAndResult(
     mlir::Type i32Ty = builder.getI32Type();
     mlir::Value one = builder.createIntegerConstant(loc, i32Ty, 1);
 
-    mlir::Value grid_x, grid_y;
+    mlir::Value grid_x, grid_y, grid_z;
     if (caller.getCallDescription().chevrons()[0].GetType()->category() ==
         Fortran::common::TypeCategory::Integer) {
       // If grid is an integer, it is converted to dim3(grid,1,1). Since z is
@@ -426,11 +426,13 @@ std::pair<fir::ExtendedValue, bool> Fortran::lower::genCallOpAndResult(
           fir::getBase(converter.genExprValue(
               caller.getCallDescription().chevrons()[0], stmtCtx)));
       grid_y = one;
+      grid_z = one;
     } else {
       auto dim3Addr = converter.genExprAddr(
           caller.getCallDescription().chevrons()[0], stmtCtx);
       grid_x = readDim3Value(builder, loc, fir::getBase(dim3Addr), "x");
       grid_y = readDim3Value(builder, loc, fir::getBase(dim3Addr), "y");
+      grid_z = readDim3Value(builder, loc, fir::getBase(dim3Addr), "z");
     }
 
     mlir::Value block_x, block_y, block_z;
@@ -466,8 +468,8 @@ std::pair<fir::ExtendedValue, bool> Fortran::lower::genCallOpAndResult(
               caller.getCallDescription().chevrons()[3], stmtCtx)));
 
     builder.create<fir::CUDAKernelLaunch>(
-        loc, funcType.getResults(), funcSymbolAttr, grid_x, grid_y, block_x,
-        block_y, block_z, bytes, stream, operands);
+        loc, funcType.getResults(), funcSymbolAttr, grid_x, grid_y, grid_z,
+        block_x, block_y, block_z, bytes, stream, operands);
     callNumResults = 0;
   } else if (caller.requireDispatchCall()) {
     // Procedure call requiring a dynamic dispatch. Call is created with
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index a41f831..6fefd04 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ClauseProcessor.h"
+#include "Clauses.h"
 
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Parser/tools.h"
@@ -30,64 +31,55 @@ static void checkMapType(mlir::Location location, mlir::Type type) {
 }
 
 static mlir::omp::ScheduleModifier
-translateScheduleModifier(const Fortran::parser::OmpScheduleModifierType &m) {
-  switch (m.v) {
-  case Fortran::parser::OmpScheduleModifierType::ModType::Monotonic:
+translateScheduleModifier(const omp::clause::Schedule::ModType &m) {
+  switch (m) {
+  case omp::clause::Schedule::ModType::Monotonic:
     return mlir::omp::ScheduleModifier::monotonic;
-  case Fortran::parser::OmpScheduleModifierType::ModType::Nonmonotonic:
+  case omp::clause::Schedule::ModType::Nonmonotonic:
     return mlir::omp::ScheduleModifier::nonmonotonic;
-  case Fortran::parser::OmpScheduleModifierType::ModType::Simd:
+  case omp::clause::Schedule::ModType::Simd:
     return mlir::omp::ScheduleModifier::simd;
   }
   return mlir::omp::ScheduleModifier::none;
 }
 
 static mlir::omp::ScheduleModifier
-getScheduleModifier(const Fortran::parser::OmpScheduleClause &x) {
-  const auto &modifier =
-      std::get<std::optional<Fortran::parser::OmpScheduleModifier>>(x.t);
+getScheduleModifier(const omp::clause::Schedule &clause) {
+  using ScheduleModifier = omp::clause::Schedule::ScheduleModifier;
+  const auto &modifier = std::get<std::optional<ScheduleModifier>>(clause.t);
   // The input may have the modifier any order, so we look for one that isn't
   // SIMD. If modifier is not set at all, fall down to the bottom and return
   // "none".
   if (modifier) {
-    const auto &modType1 =
-        std::get<Fortran::parser::OmpScheduleModifier::Modifier1>(modifier->t);
-    if (modType1.v.v ==
-        Fortran::parser::OmpScheduleModifierType::ModType::Simd) {
-      const auto &modType2 = std::get<
-          std::optional<Fortran::parser::OmpScheduleModifier::Modifier2>>(
-          modifier->t);
-      if (modType2 &&
-          modType2->v.v !=
-              Fortran::parser::OmpScheduleModifierType::ModType::Simd)
-        return translateScheduleModifier(modType2->v);
-
+    using ModType = omp::clause::Schedule::ModType;
+    const auto &modType1 = std::get<ModType>(modifier->t);
+    if (modType1 == ModType::Simd) {
+      const auto &modType2 = std::get<std::optional<ModType>>(modifier->t);
+      if (modType2 && *modType2 != ModType::Simd)
+        return translateScheduleModifier(*modType2);
       return mlir::omp::ScheduleModifier::none;
     }
 
-    return translateScheduleModifier(modType1.v);
+    return translateScheduleModifier(modType1);
   }
   return mlir::omp::ScheduleModifier::none;
 }
 
 static mlir::omp::ScheduleModifier
-getSimdModifier(const Fortran::parser::OmpScheduleClause &x) {
-  const auto &modifier =
-      std::get<std::optional<Fortran::parser::OmpScheduleModifier>>(x.t);
+getSimdModifier(const omp::clause::Schedule &clause) {
+  using ScheduleModifier = omp::clause::Schedule::ScheduleModifier;
+  const auto &modifier = std::get<std::optional<ScheduleModifier>>(clause.t);
   // Either of the two possible modifiers in the input can be the SIMD modifier,
   // so look in either one, and return simd if we find one. Not found = return
   // "none".
   if (modifier) {
-    const auto &modType1 =
-        std::get<Fortran::parser::OmpScheduleModifier::Modifier1>(modifier->t);
-    if (modType1.v.v == Fortran::parser::OmpScheduleModifierType::ModType::Simd)
+    using ModType = omp::clause::Schedule::ModType;
+    const auto &modType1 = std::get<ModType>(modifier->t);
+    if (modType1 == ModType::Simd)
       return mlir::omp::ScheduleModifier::simd;
 
-    const auto &modType2 = std::get<
-        std::optional<Fortran::parser::OmpScheduleModifier::Modifier2>>(
-        modifier->t);
-    if (modType2 && modType2->v.v ==
-                        Fortran::parser::OmpScheduleModifierType::ModType::Simd)
+    const auto &modType2 = std::get<std::optional<ModType>>(modifier->t);
+    if (modType2 && *modType2 == ModType::Simd)
       return mlir::omp::ScheduleModifier::simd;
   }
   return mlir::omp::ScheduleModifier::none;
@@ -95,29 +87,25 @@ getSimdModifier(const Fortran::parser::OmpScheduleClause &x) {
 
 static void
 genAllocateClause(Fortran::lower::AbstractConverter &converter,
-                  const Fortran::parser::OmpAllocateClause &ompAllocateClause,
+                  const omp::clause::Allocate &clause,
                   llvm::SmallVectorImpl<mlir::Value> &allocatorOperands,
                   llvm::SmallVectorImpl<mlir::Value> &allocateOperands) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::Location currentLocation = converter.getCurrentLocation();
   Fortran::lower::StatementContext stmtCtx;
 
-  mlir::Value allocatorOperand;
-  const Fortran::parser::OmpObjectList &ompObjectList =
-      std::get<Fortran::parser::OmpObjectList>(ompAllocateClause.t);
-  const auto &allocateModifier = std::get<
-      std::optional<Fortran::parser::OmpAllocateClause::AllocateModifier>>(
-      ompAllocateClause.t);
+  const omp::ObjectList &objectList = std::get<omp::ObjectList>(clause.t);
+  const auto &modifier =
+      std::get<std::optional<omp::clause::Allocate::Modifier>>(clause.t);
 
   // If the allocate modifier is present, check if we only use the allocator
   // submodifier.  ALIGN in this context is unimplemented
   const bool onlyAllocator =
-      allocateModifier &&
-      std::holds_alternative<
-          Fortran::parser::OmpAllocateClause::AllocateModifier::Allocator>(
-          allocateModifier->u);
+      modifier &&
+      std::holds_alternative<omp::clause::Allocate::Modifier::Allocator>(
+          modifier->u);
 
-  if (allocateModifier && !onlyAllocator) {
+  if (modifier && !onlyAllocator) {
     TODO(currentLocation, "OmpAllocateClause ALIGN modifier");
   }
 
@@ -125,37 +113,34 @@ genAllocateClause(Fortran::lower::AbstractConverter &converter,
   // to list of allocators, otherwise, add default allocator to
   // list of allocators.
   if (onlyAllocator) {
-    const auto &allocatorValue = std::get<
-        Fortran::parser::OmpAllocateClause::AllocateModifier::Allocator>(
-        allocateModifier->u);
-    allocatorOperand = fir::getBase(converter.genExprValue(
-        *Fortran::semantics::GetExpr(allocatorValue.v), stmtCtx));
-    allocatorOperands.insert(allocatorOperands.end(), ompObjectList.v.size(),
-                             allocatorOperand);
+    const auto &value =
+        std::get<omp::clause::Allocate::Modifier::Allocator>(modifier->u);
+    mlir::Value operand =
+        fir::getBase(converter.genExprValue(value.v, stmtCtx));
+    allocatorOperands.append(objectList.size(), operand);
   } else {
-    allocatorOperand = firOpBuilder.createIntegerConstant(
+    mlir::Value operand = firOpBuilder.createIntegerConstant(
         currentLocation, firOpBuilder.getI32Type(), 1);
-    allocatorOperands.insert(allocatorOperands.end(), ompObjectList.v.size(),
-                             allocatorOperand);
+    allocatorOperands.append(objectList.size(), operand);
   }
-  genObjectList(ompObjectList, converter, allocateOperands);
+  genObjectList(objectList, converter, allocateOperands);
 }
 
-static mlir::omp::ClauseProcBindKindAttr genProcBindKindAttr(
-    fir::FirOpBuilder &firOpBuilder,
-    const Fortran::parser::OmpClause::ProcBind *procBindClause) {
+static mlir::omp::ClauseProcBindKindAttr
+genProcBindKindAttr(fir::FirOpBuilder &firOpBuilder,
+                    const omp::clause::ProcBind &clause) {
   mlir::omp::ClauseProcBindKind procBindKind;
-  switch (procBindClause->v.v) {
-  case Fortran::parser::OmpProcBindClause::Type::Master:
+  switch (clause.v) {
+  case omp::clause::ProcBind::Type::Master:
     procBindKind = mlir::omp::ClauseProcBindKind::Master;
     break;
-  case Fortran::parser::OmpProcBindClause::Type::Close:
+  case omp::clause::ProcBind::Type::Close:
     procBindKind = mlir::omp::ClauseProcBindKind::Close;
     break;
-  case Fortran::parser::OmpProcBindClause::Type::Spread:
+  case omp::clause::ProcBind::Type::Spread:
     procBindKind = mlir::omp::ClauseProcBindKind::Spread;
     break;
-  case Fortran::parser::OmpProcBindClause::Type::Primary:
+  case omp::clause::ProcBind::Type::Primary:
     procBindKind = mlir::omp::ClauseProcBindKind::Primary;
     break;
   }
@@ -165,20 +150,17 @@ static mlir::omp::ClauseProcBindKindAttr genProcBindKindAttr(
 
 static mlir::omp::ClauseTaskDependAttr
 genDependKindAttr(fir::FirOpBuilder &firOpBuilder,
-                  const Fortran::parser::OmpClause::Depend *dependClause) {
+                  const omp::clause::Depend &clause) {
   mlir::omp::ClauseTaskDepend pbKind;
-  switch (
-      std::get<Fortran::parser::OmpDependenceType>(
-          std::get<Fortran::parser::OmpDependClause::InOut>(dependClause->v.u)
-              .t)
-          .v) {
-  case Fortran::parser::OmpDependenceType::Type::In:
+  const auto &inOut = std::get<omp::clause::Depend::InOut>(clause.u);
+  switch (std::get<omp::clause::Depend::Type>(inOut.t)) {
+  case omp::clause::Depend::Type::In:
     pbKind = mlir::omp::ClauseTaskDepend::taskdependin;
     break;
-  case Fortran::parser::OmpDependenceType::Type::Out:
+  case omp::clause::Depend::Type::Out:
     pbKind = mlir::omp::ClauseTaskDepend::taskdependout;
     break;
-  case Fortran::parser::OmpDependenceType::Type::Inout:
+  case omp::clause::Depend::Type::Inout:
     pbKind = mlir::omp::ClauseTaskDepend::taskdependinout;
     break;
   default:
@@ -189,45 +171,41 @@ genDependKindAttr(fir::FirOpBuilder &firOpBuilder,
                                               pbKind);
 }
 
-static mlir::Value getIfClauseOperand(
-    Fortran::lower::AbstractConverter &converter,
-    const Fortran::parser::OmpClause::If *ifClause,
-    Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName,
-    mlir::Location clauseLocation) {
+static mlir::Value
+getIfClauseOperand(Fortran::lower::AbstractConverter &converter,
+                   const omp::clause::If &clause,
+                   omp::clause::If::DirectiveNameModifier directiveName,
+                   mlir::Location clauseLocation) {
   // Only consider the clause if it's intended for the given directive.
-  auto &directive = std::get<
-      std::optional<Fortran::parser::OmpIfClause::DirectiveNameModifier>>(
-      ifClause->v.t);
+  auto &directive =
+      std::get<std::optional<omp::clause::If::DirectiveNameModifier>>(clause.t);
   if (directive && directive.value() != directiveName)
     return nullptr;
 
   Fortran::lower::StatementContext stmtCtx;
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  auto &expr = std::get<Fortran::parser::ScalarLogicalExpr>(ifClause->v.t);
   mlir::Value ifVal = fir::getBase(
-      converter.genExprValue(*Fortran::semantics::GetExpr(expr), stmtCtx));
+      converter.genExprValue(std::get<omp::SomeExpr>(clause.t), stmtCtx));
   return firOpBuilder.createConvert(clauseLocation, firOpBuilder.getI1Type(),
                                     ifVal);
 }
 
 static void
 addUseDeviceClause(Fortran::lower::AbstractConverter &converter,
-                   const Fortran::parser::OmpObjectList &useDeviceClause,
+                   const omp::ObjectList &objects,
                    llvm::SmallVectorImpl<mlir::Value> &operands,
                    llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
                    llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
                    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
                        &useDeviceSymbols) {
-  genObjectList(useDeviceClause, converter, operands);
+  genObjectList(objects, converter, operands);
   for (mlir::Value &operand : operands) {
     checkMapType(operand.getLoc(), operand.getType());
     useDeviceTypes.push_back(operand.getType());
     useDeviceLocs.push_back(operand.getLoc());
   }
-  for (const Fortran::parser::OmpObject &ompObject : useDeviceClause.v) {
-    Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject);
-    useDeviceSymbols.push_back(sym);
-  }
+  for (const omp::Object &object : objects)
+    useDeviceSymbols.push_back(object.id());
 }
 
 //===----------------------------------------------------------------------===//
@@ -253,9 +231,8 @@ bool ClauseProcessor::processCollapse(
   }
 
   std::int64_t collapseValue = 1l;
-  if (auto *collapseClause = findUniqueClause<ClauseTy::Collapse>()) {
-    const auto *expr = Fortran::semantics::GetExpr(collapseClause->v);
-    collapseValue = Fortran::evaluate::ToInt64(*expr).value();
+  if (auto *clause = findUniqueClause<omp::clause::Collapse>()) {
+    collapseValue = Fortran::evaluate::ToInt64(clause->v).value();
     found = true;
   }
 
@@ -294,19 +271,19 @@ bool ClauseProcessor::processCollapse(
 }
 
 bool ClauseProcessor::processDefault() const {
-  if (auto *defaultClause = findUniqueClause<ClauseTy::Default>()) {
+  if (auto *clause = findUniqueClause<omp::clause::Default>()) {
     // Private, Firstprivate, Shared, None
-    switch (defaultClause->v.v) {
-    case Fortran::parser::OmpDefaultClause::Type::Shared:
-    case Fortran::parser::OmpDefaultClause::Type::None:
+    switch (clause->v) {
+    case omp::clause::Default::Type::Shared:
+    case omp::clause::Default::Type::None:
       // Default clause with shared or none do not require any handling since
       // Shared is the default behavior in the IR and None is only required
       // for semantic checks.
       break;
-    case Fortran::parser::OmpDefaultClause::Type::Private:
+    case omp::clause::Default::Type::Private:
       // TODO Support default(private)
       break;
-    case Fortran::parser::OmpDefaultClause::Type::Firstprivate:
+    case omp::clause::Default::Type::Firstprivate:
       // TODO Support default(firstprivate)
       break;
     }
@@ -318,20 +295,17 @@ bool ClauseProcessor::processDefault() const {
 bool ClauseProcessor::processDevice(Fortran::lower::StatementContext &stmtCtx,
                                     mlir::Value &result) const {
   const Fortran::parser::CharBlock *source = nullptr;
-  if (auto *deviceClause = findUniqueClause<ClauseTy::Device>(&source)) {
+  if (auto *clause = findUniqueClause<omp::clause::Device>(&source)) {
     mlir::Location clauseLocation = converter.genLocation(*source);
-    if (auto deviceModifier = std::get<
-            std::optional<Fortran::parser::OmpDeviceClause::DeviceModifier>>(
-            deviceClause->v.t)) {
-      if (deviceModifier ==
-          Fortran::parser::OmpDeviceClause::DeviceModifier::Ancestor) {
+    if (auto deviceModifier =
+            std::get<std::optional<omp::clause::Device::DeviceModifier>>(
+                clause->t)) {
+      if (deviceModifier == omp::clause::Device::DeviceModifier::Ancestor) {
         TODO(clauseLocation, "OMPD_target Device Modifier Ancestor");
       }
     }
-    if (const auto *deviceExpr = Fortran::semantics::GetExpr(
-            std::get<Fortran::parser::ScalarIntExpr>(deviceClause->v.t))) {
-      result = fir::getBase(converter.genExprValue(*deviceExpr, stmtCtx));
-    }
+    const auto &deviceExpr = std::get<omp::SomeExpr>(clause->t);
+    result = fir::getBase(converter.genExprValue(deviceExpr, stmtCtx));
     return true;
   }
   return false;
@@ -339,16 +313,16 @@ bool ClauseProcessor::processDevice(Fortran::lower::StatementContext &stmtCtx,
 
 bool ClauseProcessor::processDeviceType(
     mlir::omp::DeclareTargetDeviceType &result) const {
-  if (auto *deviceTypeClause = findUniqueClause<ClauseTy::DeviceType>()) {
+  if (auto *clause = findUniqueClause<omp::clause::DeviceType>()) {
     // Case: declare target ... device_type(any | host | nohost)
-    switch (deviceTypeClause->v.v) {
-    case Fortran::parser::OmpDeviceTypeClause::Type::Nohost:
+    switch (clause->v) {
+    case omp::clause::DeviceType::Type::Nohost:
       result = mlir::omp::DeclareTargetDeviceType::nohost;
       break;
-    case Fortran::parser::OmpDeviceTypeClause::Type::Host:
+    case omp::clause::DeviceType::Type::Host:
       result = mlir::omp::DeclareTargetDeviceType::host;
       break;
-    case Fortran::parser::OmpDeviceTypeClause::Type::Any:
+    case omp::clause::DeviceType::Type::Any:
       result = mlir::omp::DeclareTargetDeviceType::any;
       break;
     }
@@ -360,12 +334,12 @@ bool ClauseProcessor::processDeviceType(
 bool ClauseProcessor::processFinal(Fortran::lower::StatementContext &stmtCtx,
                                    mlir::Value &result) const {
   const Fortran::parser::CharBlock *source = nullptr;
-  if (auto *finalClause = findUniqueClause<ClauseTy::Final>(&source)) {
+  if (auto *clause = findUniqueClause<omp::clause::Final>(&source)) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     mlir::Location clauseLocation = converter.genLocation(*source);
 
-    mlir::Value finalVal = fir::getBase(converter.genExprValue(
-        *Fortran::semantics::GetExpr(finalClause->v), stmtCtx));
+    mlir::Value finalVal =
+        fir::getBase(converter.genExprValue(clause->v, stmtCtx));
     result = firOpBuilder.createConvert(clauseLocation,
                                         firOpBuilder.getI1Type(), finalVal);
     return true;
@@ -374,10 +348,9 @@ bool ClauseProcessor::processFinal(Fortran::lower::StatementContext &stmtCtx,
 }
 
 bool ClauseProcessor::processHint(mlir::IntegerAttr &result) const {
-  if (auto *hintClause = findUniqueClause<ClauseTy::Hint>()) {
+  if (auto *clause = findUniqueClause<omp::clause::Hint>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-    const auto *expr = Fortran::semantics::GetExpr(hintClause->v);
-    int64_t hintValue = *Fortran::evaluate::ToInt64(*expr);
+    int64_t hintValue = *Fortran::evaluate::ToInt64(clause->v);
     result = firOpBuilder.getI64IntegerAttr(hintValue);
     return true;
   }
@@ -385,20 +358,19 @@ bool ClauseProcessor::processHint(mlir::IntegerAttr &result) const {
 }
 
 bool ClauseProcessor::processMergeable(mlir::UnitAttr &result) const {
-  return markClauseOccurrence<ClauseTy::Mergeable>(result);
+  return markClauseOccurrence<omp::clause::Mergeable>(result);
 }
 
 bool ClauseProcessor::processNowait(mlir::UnitAttr &result) const {
-  return markClauseOccurrence<ClauseTy::Nowait>(result);
+  return markClauseOccurrence<omp::clause::Nowait>(result);
 }
 
 bool ClauseProcessor::processNumTeams(Fortran::lower::StatementContext &stmtCtx,
                                       mlir::Value &result) const {
   // TODO Get lower and upper bounds for num_teams when parser is updated to
   // accept both.
-  if (auto *numTeamsClause = findUniqueClause<ClauseTy::NumTeams>()) {
-    result = fir::getBase(converter.genExprValue(
-        *Fortran::semantics::GetExpr(numTeamsClause->v), stmtCtx));
+  if (auto *clause = findUniqueClause<omp::clause::NumTeams>()) {
+    result = fir::getBase(converter.genExprValue(clause->v, stmtCtx));
     return true;
   }
   return false;
@@ -406,23 +378,20 @@ bool ClauseProcessor::processNumTeams(Fortran::lower::StatementContext &stmtCtx,
 
 bool ClauseProcessor::processNumThreads(
     Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const {
-  if (auto *numThreadsClause = findUniqueClause<ClauseTy::NumThreads>()) {
+  if (auto *clause = findUniqueClause<omp::clause::NumThreads>()) {
     // OMPIRBuilder expects `NUM_THREADS` clause as a `Value`.
-    result = fir::getBase(converter.genExprValue(
-        *Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx));
+    result = fir::getBase(converter.genExprValue(clause->v, stmtCtx));
     return true;
   }
   return false;
 }
 
 bool ClauseProcessor::processOrdered(mlir::IntegerAttr &result) const {
-  if (auto *orderedClause = findUniqueClause<ClauseTy::Ordered>()) {
+  if (auto *clause = findUniqueClause<omp::clause::Ordered>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     int64_t orderedClauseValue = 0l;
-    if (orderedClause->v.has_value()) {
-      const auto *expr = Fortran::semantics::GetExpr(orderedClause->v);
-      orderedClauseValue = *Fortran::evaluate::ToInt64(*expr);
-    }
+    if (clause->v.has_value())
+      orderedClauseValue = *Fortran::evaluate::ToInt64(*clause->v);
     result = firOpBuilder.getI64IntegerAttr(orderedClauseValue);
     return true;
   }
@@ -431,9 +400,8 @@ bool ClauseProcessor::processOrdered(mlir::IntegerAttr &result) const {
 
 bool ClauseProcessor::processPriority(Fortran::lower::StatementContext &stmtCtx,
                                       mlir::Value &result) const {
-  if (auto *priorityClause = findUniqueClause<ClauseTy::Priority>()) {
-    result = fir::getBase(converter.genExprValue(
-        *Fortran::semantics::GetExpr(priorityClause->v), stmtCtx));
+  if (auto *clause = findUniqueClause<omp::clause::Priority>()) {
+    result = fir::getBase(converter.genExprValue(clause->v, stmtCtx));
     return true;
   }
   return false;
@@ -441,20 +409,19 @@ bool ClauseProcessor::processPriority(Fortran::lower::StatementContext &stmtCtx,
 
 bool ClauseProcessor::processProcBind(
     mlir::omp::ClauseProcBindKindAttr &result) const {
-  if (auto *procBindClause = findUniqueClause<ClauseTy::ProcBind>()) {
+  if (auto *clause = findUniqueClause<omp::clause::ProcBind>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-    result = genProcBindKindAttr(firOpBuilder, procBindClause);
+    result = genProcBindKindAttr(firOpBuilder, *clause);
     return true;
   }
   return false;
 }
 
 bool ClauseProcessor::processSafelen(mlir::IntegerAttr &result) const {
-  if (auto *safelenClause = findUniqueClause<ClauseTy::Safelen>()) {
+  if (auto *clause = findUniqueClause<omp::clause::Safelen>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-    const auto *expr = Fortran::semantics::GetExpr(safelenClause->v);
     const std::optional<std::int64_t> safelenVal =
-        Fortran::evaluate::ToInt64(*expr);
+        Fortran::evaluate::ToInt64(clause->v);
     result = firOpBuilder.getI64IntegerAttr(*safelenVal);
     return true;
   }
@@ -465,41 +432,38 @@ bool ClauseProcessor::processSchedule(
     mlir::omp::ClauseScheduleKindAttr &valAttr,
     mlir::omp::ScheduleModifierAttr &modifierAttr,
     mlir::UnitAttr &simdModifierAttr) const {
-  if (auto *scheduleClause = findUniqueClause<ClauseTy::Schedule>()) {
+  if (auto *clause = findUniqueClause<omp::clause::Schedule>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     mlir::MLIRContext *context = firOpBuilder.getContext();
-    const Fortran::parser::OmpScheduleClause &scheduleType = scheduleClause->v;
-    const auto &scheduleClauseKind =
-        std::get<Fortran::parser::OmpScheduleClause::ScheduleType>(
-            scheduleType.t);
+    const auto &scheduleType =
+        std::get<omp::clause::Schedule::ScheduleType>(clause->t);
 
     mlir::omp::ClauseScheduleKind scheduleKind;
-    switch (scheduleClauseKind) {
-    case Fortran::parser::OmpScheduleClause::ScheduleType::Static:
+    switch (scheduleType) {
+    case omp::clause::Schedule::ScheduleType::Static:
       scheduleKind = mlir::omp::ClauseScheduleKind::Static;
       break;
-    case Fortran::parser::OmpScheduleClause::ScheduleType::Dynamic:
+    case omp::clause::Schedule::ScheduleType::Dynamic:
       scheduleKind = mlir::omp::ClauseScheduleKind::Dynamic;
       break;
-    case Fortran::parser::OmpScheduleClause::ScheduleType::Guided:
+    case omp::clause::Schedule::ScheduleType::Guided:
       scheduleKind = mlir::omp::ClauseScheduleKind::Guided;
       break;
-    case Fortran::parser::OmpScheduleClause::ScheduleType::Auto:
+    case omp::clause::Schedule::ScheduleType::Auto:
       scheduleKind = mlir::omp::ClauseScheduleKind::Auto;
       break;
-    case Fortran::parser::OmpScheduleClause::ScheduleType::Runtime:
+    case omp::clause::Schedule::ScheduleType::Runtime:
       scheduleKind = mlir::omp::ClauseScheduleKind::Runtime;
       break;
     }
 
-    mlir::omp::ScheduleModifier scheduleModifier =
-        getScheduleModifier(scheduleClause->v);
+    mlir::omp::ScheduleModifier scheduleModifier = getScheduleModifier(*clause);
 
     if (scheduleModifier != mlir::omp::ScheduleModifier::none)
       modifierAttr =
           mlir::omp::ScheduleModifierAttr::get(context, scheduleModifier);
 
-    if (getSimdModifier(scheduleClause->v) != mlir::omp::ScheduleModifier::none)
+    if (getSimdModifier(*clause) != mlir::omp::ScheduleModifier::none)
       simdModifierAttr = firOpBuilder.getUnitAttr();
 
     valAttr = mlir::omp::ClauseScheduleKindAttr::get(context, scheduleKind);
@@ -510,25 +474,19 @@ bool ClauseProcessor::processSchedule(
 
 bool ClauseProcessor::processScheduleChunk(
     Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const {
-  if (auto *scheduleClause = findUniqueClause<ClauseTy::Schedule>()) {
-    if (const auto &chunkExpr =
-            std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
-                scheduleClause->v.t)) {
-      if (const auto *expr = Fortran::semantics::GetExpr(*chunkExpr)) {
-        result = fir::getBase(converter.genExprValue(*expr, stmtCtx));
-      }
-    }
+  if (auto *clause = findUniqueClause<omp::clause::Schedule>()) {
+    if (const auto &chunkExpr = std::get<omp::MaybeExpr>(clause->t))
+      result = fir::getBase(converter.genExprValue(*chunkExpr, stmtCtx));
     return true;
   }
   return false;
 }
 
 bool ClauseProcessor::processSimdlen(mlir::IntegerAttr &result) const {
-  if (auto *simdlenClause = findUniqueClause<ClauseTy::Simdlen>()) {
+  if (auto *clause = findUniqueClause<omp::clause::Simdlen>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-    const auto *expr = Fortran::semantics::GetExpr(simdlenClause->v);
     const std::optional<std::int64_t> simdlenVal =
-        Fortran::evaluate::ToInt64(*expr);
+        Fortran::evaluate::ToInt64(clause->v);
     result = firOpBuilder.getI64IntegerAttr(*simdlenVal);
     return true;
   }
@@ -537,16 +495,15 @@ bool ClauseProcessor::processSimdlen(mlir::IntegerAttr &result) const {
 
 bool ClauseProcessor::processThreadLimit(
     Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const {
-  if (auto *threadLmtClause = findUniqueClause<ClauseTy::ThreadLimit>()) {
-    result = fir::getBase(converter.genExprValue(
-        *Fortran::semantics::GetExpr(threadLmtClause->v), stmtCtx));
+  if (auto *clause = findUniqueClause<omp::clause::ThreadLimit>()) {
+    result = fir::getBase(converter.genExprValue(clause->v, stmtCtx));
     return true;
   }
   return false;
 }
 
 bool ClauseProcessor::processUntied(mlir::UnitAttr &result) const {
-  return markClauseOccurrence<ClauseTy::Untied>(result);
+  return markClauseOccurrence<omp::clause::Untied>(result);
 }
 
 //===----------------------------------------------------------------------===//
@@ -556,10 +513,10 @@ bool ClauseProcessor::processUntied(mlir::UnitAttr &result) const {
 bool ClauseProcessor::processAllocate(
     llvm::SmallVectorImpl<mlir::Value> &allocatorOperands,
     llvm::SmallVectorImpl<mlir::Value> &allocateOperands) const {
-  return findRepeatableClause<ClauseTy::Allocate>(
-      [&](const ClauseTy::Allocate *allocateClause,
+  return findRepeatableClause<omp::clause::Allocate>(
+      [&](const omp::clause::Allocate &clause,
           const Fortran::parser::CharBlock &) {
-        genAllocateClause(converter, allocateClause->v, allocatorOperands,
+        genAllocateClause(converter, clause, allocatorOperands,
                           allocateOperands);
       });
 }
@@ -576,12 +533,12 @@ bool ClauseProcessor::processCopyin() const {
         if (converter.isPresentShallowLookup(*sym))
           converter.copyHostAssociateVar(*sym, copyAssignIP);
       };
-  bool hasCopyin = findRepeatableClause<ClauseTy::Copyin>(
-      [&](const ClauseTy::Copyin *copyinClause,
+  bool hasCopyin = findRepeatableClause<omp::clause::Copyin>(
+      [&](const omp::clause::Copyin &clause,
           const Fortran::parser::CharBlock &) {
-        const Fortran::parser::OmpObjectList &ompObjectList = copyinClause->v;
-        for (const Fortran::parser::OmpObject &ompObject : ompObjectList.v) {
-          Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject);
+        for (const omp::Object &object : clause.v) {
+          Fortran::semantics::Symbol *sym = object.id();
+          assert(sym && "Expecting symbol");
           if (const auto *commonDetails =
                   sym->detailsIf<Fortran::semantics::CommonBlockDetails>()) {
             for (const auto &mem : commonDetails->objects())
@@ -745,13 +702,11 @@ bool ClauseProcessor::processCopyPrivate(
     copyPrivateFuncs.push_back(mlir::SymbolRefAttr::get(funcOp));
   };
 
-  bool hasCopyPrivate = findRepeatableClause<ClauseTy::Copyprivate>(
-      [&](const ClauseTy::Copyprivate *copyPrivateClause,
+  bool hasCopyPrivate = findRepeatableClause<clause::Copyprivate>(
+      [&](const clause::Copyprivate &clause,
           const Fortran::parser::CharBlock &) {
-        const Fortran::parser::OmpObjectList &ompObjectList =
-            copyPrivateClause->v;
-        for (const Fortran::parser::OmpObject &ompObject : ompObjectList.v) {
-          Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject);
+        for (const Object &object : clause.v) {
+          Fortran::semantics::Symbol *sym = object.id();
           if (const auto *commonDetails =
                   sym->detailsIf<Fortran::semantics::CommonBlockDetails>()) {
             for (const auto &mem : commonDetails->objects())
@@ -770,38 +725,30 @@ bool ClauseProcessor::processDepend(
     llvm::SmallVectorImpl<mlir::Value> &dependOperands) const {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
-  return findRepeatableClause<ClauseTy::Depend>(
-      [&](const ClauseTy::Depend *dependClause,
+  return findRepeatableClause<omp::clause::Depend>(
+      [&](const omp::clause::Depend &clause,
           const Fortran::parser::CharBlock &) {
-        const std::list<Fortran::parser::Designator> &depVal =
-            std::get<std::list<Fortran::parser::Designator>>(
-                std::get<Fortran::parser::OmpDependClause::InOut>(
-                    dependClause->v.u)
-                    .t);
+        assert(std::holds_alternative<omp::clause::Depend::InOut>(clause.u) &&
+               "Only InOut is handled at the moment");
+        const auto &inOut = std::get<omp::clause::Depend::InOut>(clause.u);
+        const auto &objects = std::get<omp::ObjectList>(inOut.t);
+
         mlir::omp::ClauseTaskDependAttr dependTypeOperand =
-            genDependKindAttr(firOpBuilder, dependClause);
-        dependTypeOperands.insert(dependTypeOperands.end(), depVal.size(),
-                                  dependTypeOperand);
-        for (const Fortran::parser::Designator &ompObject : depVal) {
-          Fortran::semantics::Symbol *sym = nullptr;
-          std::visit(
-              Fortran::common::visitors{
-                  [&](const Fortran::parser::DataRef &designator) {
-                    if (const Fortran::parser::Name *name =
-                            std::get_if<Fortran::parser::Name>(&designator.u)) {
-                      sym = name->symbol;
-                    } else if (std::get_if<Fortran::common::Indirection<
-                                   Fortran::parser::ArrayElement>>(
-                                   &designator.u)) {
-                      TODO(converter.getCurrentLocation(),
-                           "array sections not supported for task depend");
-                    }
-                  },
-                  [&](const Fortran::parser::Substring &designator) {
-                    TODO(converter.getCurrentLocation(),
-                         "substring not supported for task depend");
-                  }},
-              (ompObject).u);
+            genDependKindAttr(firOpBuilder, clause);
+        dependTypeOperands.append(objects.size(), dependTypeOperand);
+
+        for (const omp::Object &object : objects) {
+          assert(object.ref() && "Expecting designator");
+
+          if (Fortran::evaluate::ExtractSubstring(*object.ref())) {
+            TODO(converter.getCurrentLocation(),
+                 "substring not supported for task depend");
+          } else if (Fortran::evaluate::IsArrayElement(*object.ref())) {
+            TODO(converter.getCurrentLocation(),
+                 "array sections not supported for task depend");
+          }
+
+          Fortran::semantics::Symbol *sym = object.id();
           const mlir::Value variable = converter.getSymbolAddress(*sym);
           dependOperands.push_back(variable);
         }
@@ -809,14 +756,14 @@ bool ClauseProcessor::processDepend(
 }
 
 bool ClauseProcessor::processIf(
-    Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName,
+    omp::clause::If::DirectiveNameModifier directiveName,
     mlir::Value &result) const {
   bool found = false;
-  findRepeatableClause<ClauseTy::If>(
-      [&](const ClauseTy::If *ifClause,
+  findRepeatableClause<omp::clause::If>(
+      [&](const omp::clause::If &clause,
           const Fortran::parser::CharBlock &source) {
         mlir::Location clauseLocation = converter.genLocation(source);
-        mlir::Value operand = getIfClauseOperand(converter, ifClause,
+        mlir::Value operand = getIfClauseOperand(converter, clause,
                                                  directiveName, clauseLocation);
         // Assume that, at most, a single 'if' clause will be applicable to the
         // given directive.
@@ -830,12 +777,11 @@ bool ClauseProcessor::processIf(
 
 bool ClauseProcessor::processLink(
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
-  return findRepeatableClause<ClauseTy::Link>(
-      [&](const ClauseTy::Link *linkClause,
-          const Fortran::parser::CharBlock &) {
+  return findRepeatableClause<omp::clause::Link>(
+      [&](const omp::clause::Link &clause, const Fortran::parser::CharBlock &) {
         // Case: declare target link(var1, var2)...
         gatherFuncAndVarSyms(
-            linkClause->v, mlir::omp::DeclareTargetCaptureClause::link, result);
+            clause.v, mlir::omp::DeclareTargetCaptureClause::link, result);
       });
 }
 
@@ -872,7 +818,7 @@ bool ClauseProcessor::processMap(
     llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *mapSymbols)
     const {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  return findRepeatableClause<ClauseTy::Map>(
+  return findRepeatableClause2<ClauseTy::Map>(
       [&](const ClauseTy::Map *mapClause,
           const Fortran::parser::CharBlock &source) {
         mlir::Location clauseLocation = converter.genLocation(source);
@@ -964,43 +910,41 @@ bool ClauseProcessor::processReduction(
     llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
     llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *reductionSymbols)
     const {
-  return findRepeatableClause<ClauseTy::Reduction>(
-      [&](const ClauseTy::Reduction *reductionClause,
+  return findRepeatableClause<omp::clause::Reduction>(
+      [&](const omp::clause::Reduction &clause,
           const Fortran::parser::CharBlock &) {
         ReductionProcessor rp;
-        rp.addReductionDecl(currentLocation, converter, reductionClause->v,
-                            reductionVars, reductionDeclSymbols,
-                            reductionSymbols);
+        rp.addReductionDecl(currentLocation, converter, clause, reductionVars,
+                            reductionDeclSymbols, reductionSymbols);
       });
 }
 
 bool ClauseProcessor::processSectionsReduction(
     mlir::Location currentLocation) const {
-  return findRepeatableClause<ClauseTy::Reduction>(
-      [&](const ClauseTy::Reduction *, const Fortran::parser::CharBlock &) {
+  return findRepeatableClause<omp::clause::Reduction>(
+      [&](const omp::clause::Reduction &, const Fortran::parser::CharBlock &) {
         TODO(currentLocation, "OMPC_Reduction");
       });
 }
 
 bool ClauseProcessor::processTo(
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
-  return findRepeatableClause<ClauseTy::To>(
-      [&](const ClauseTy::To *toClause, const Fortran::parser::CharBlock &) {
+  return findRepeatableClause<omp::clause::To>(
+      [&](const omp::clause::To &clause, const Fortran::parser::CharBlock &) {
         // Case: declare target to(func, var1, var2)...
-        gatherFuncAndVarSyms(toClause->v,
+        gatherFuncAndVarSyms(clause.v,
                              mlir::omp::DeclareTargetCaptureClause::to, result);
       });
 }
 
 bool ClauseProcessor::processEnter(
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
-  return findRepeatableClause<ClauseTy::Enter>(
-      [&](const ClauseTy::Enter *enterClause,
+  return findRepeatableClause<omp::clause::Enter>(
+      [&](const omp::clause::Enter &clause,
           const Fortran::parser::CharBlock &) {
         // Case: declare target enter(func, var1, var2)...
-        gatherFuncAndVarSyms(enterClause->v,
-                             mlir::omp::DeclareTargetCaptureClause::enter,
-                             result);
+        gatherFuncAndVarSyms(
+            clause.v, mlir::omp::DeclareTargetCaptureClause::enter, result);
       });
 }
 
@@ -1010,11 +954,11 @@ bool ClauseProcessor::processUseDeviceAddr(
     llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
     llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &useDeviceSymbols)
     const {
-  return findRepeatableClause<ClauseTy::UseDeviceAddr>(
-      [&](const ClauseTy::UseDeviceAddr *devAddrClause,
+  return findRepeatableClause<omp::clause::UseDeviceAddr>(
+      [&](const omp::clause::UseDeviceAddr &clause,
           const Fortran::parser::CharBlock &) {
-        addUseDeviceClause(converter, devAddrClause->v, operands,
-                           useDeviceTypes, useDeviceLocs, useDeviceSymbols);
+        addUseDeviceClause(converter, clause.v, operands, useDeviceTypes,
+                           useDeviceLocs, useDeviceSymbols);
       });
 }
 
@@ -1024,10 +968,10 @@ bool ClauseProcessor::processUseDevicePtr(
     llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
     llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &useDeviceSymbols)
     const {
-  return findRepeatableClause<ClauseTy::UseDevicePtr>(
-      [&](const ClauseTy::UseDevicePtr *devPtrClause,
+  return findRepeatableClause<omp::clause::UseDevicePtr>(
+      [&](const omp::clause::UseDevicePtr &clause,
           const Fortran::parser::CharBlock &) {
-        addUseDeviceClause(converter, devPtrClause->v, operands, useDeviceTypes,
+        addUseDeviceClause(converter, clause.v, operands, useDeviceTypes,
                            useDeviceLocs, useDeviceSymbols);
       });
 }
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 11aff0b..3f6adcc 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -12,6 +12,7 @@
 #ifndef FORTRAN_LOWER_CLAUASEPROCESSOR_H
 #define FORTRAN_LOWER_CLAUASEPROCESSOR_H
 
+#include "Clauses.h"
 #include "DirectivesCommon.h"
 #include "ReductionProcessor.h"
 #include "Utils.h"
@@ -51,7 +52,8 @@ public:
   ClauseProcessor(Fortran::lower::AbstractConverter &converter,
                   Fortran::semantics::SemanticsContext &semaCtx,
                   const Fortran::parser::OmpClauseList &clauses)
-      : converter(converter), semaCtx(semaCtx), clauses(clauses) {}
+      : converter(converter), semaCtx(semaCtx), clauses2(clauses),
+        clauses(makeList(clauses, semaCtx)) {}
 
   // 'Unique' clauses: They can appear at most once in the clause list.
   bool
@@ -103,9 +105,8 @@ public:
                      llvm::SmallVectorImpl<mlir::Value> &dependOperands) const;
   bool
   processEnter(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
-  bool
-  processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName,
-            mlir::Value &result) const;
+  bool processIf(omp::clause::If::DirectiveNameModifier directiveName,
+                 mlir::Value &result) const;
   bool
   processLink(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
 
@@ -155,11 +156,15 @@ public:
                    llvm::omp::Directive directive) const;
 
 private:
-  using ClauseIterator = std::list<ClauseTy>::const_iterator;
+  using ClauseIterator = List<Clause>::const_iterator;
+  using ClauseIterator2 = std::list<ClauseTy>::const_iterator;
 
   /// Utility to find a clause within a range in the clause list.
   template <typename T>
   static ClauseIterator findClause(ClauseIterator begin, ClauseIterator end);
+  template <typename T>
+  static ClauseIterator2 findClause2(ClauseIterator2 begin,
+                                     ClauseIterator2 end);
 
   /// Return the first instance of the given clause found in the clause list or
   /// `nullptr` if not present. If more than one instance is expected, use
@@ -172,6 +177,10 @@ private:
   /// if at least one instance was found.
   template <typename T>
   bool findRepeatableClause(
+      std::function<void(const T &, const Fortran::parser::CharBlock &source)>
+          callbackFn) const;
+  template <typename T>
+  bool findRepeatableClause2(
       std::function<void(const T *, const Fortran::parser::CharBlock &source)>
           callbackFn) const;
 
@@ -181,14 +190,15 @@ private:
 
   Fortran::lower::AbstractConverter &converter;
   Fortran::semantics::SemanticsContext &semaCtx;
-  const Fortran::parser::OmpClauseList &clauses;
+  const Fortran::parser::OmpClauseList &clauses2;
+  List<Clause> clauses;
 };
 
 template <typename T>
 bool ClauseProcessor::processMotionClauses(
     Fortran::lower::StatementContext &stmtCtx,
     llvm::SmallVectorImpl<mlir::Value> &mapOperands) {
-  return findRepeatableClause<T>(
+  return findRepeatableClause2<T>(
       [&](const T *motionClause, const Fortran::parser::CharBlock &source) {
         mlir::Location clauseLocation = converter.genLocation(source);
         fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
@@ -248,7 +258,7 @@ void ClauseProcessor::processTODO(mlir::Location currentLocation,
              " construct");
   };
 
-  for (ClauseIterator it = clauses.v.begin(); it != clauses.v.end(); ++it)
+  for (ClauseIterator2 it = clauses2.v.begin(); it != clauses2.v.end(); ++it)
     (checkUnhandledClause(std::get_if<Ts>(&it->u)), ...);
 }
 
@@ -264,10 +274,21 @@ ClauseProcessor::findClause(ClauseIterator begin, ClauseIterator end) {
 }
 
 template <typename T>
+ClauseProcessor::ClauseIterator2
+ClauseProcessor::findClause2(ClauseIterator2 begin, ClauseIterator2 end) {
+  for (ClauseIterator2 it = begin; it != end; ++it) {
+    if (std::get_if<T>(&it->u))
+      return it;
+  }
+
+  return end;
+}
+
+template <typename T>
 const T *ClauseProcessor::findUniqueClause(
     const Fortran::parser::CharBlock **source) const {
-  ClauseIterator it = findClause<T>(clauses.v.begin(), clauses.v.end());
-  if (it != clauses.v.end()) {
+  ClauseIterator it = findClause<T>(clauses.begin(), clauses.end());
+  if (it != clauses.end()) {
     if (source)
       *source = &it->source;
     return &std::get<T>(it->u);
@@ -277,14 +298,32 @@ const T *ClauseProcessor::findUniqueClause(
 
 template <typename T>
 bool ClauseProcessor::findRepeatableClause(
-    std::function<void(const T *, const Fortran::parser::CharBlock &source)>
+    std::function<void(const T &, const Fortran::parser::CharBlock &source)>
         callbackFn) const {
   bool found = false;
-  ClauseIterator nextIt, endIt = clauses.v.end();
-  for (ClauseIterator it = clauses.v.begin(); it != endIt; it = nextIt) {
+  ClauseIterator nextIt, endIt = clauses.end();
+  for (ClauseIterator it = clauses.begin(); it != endIt; it = nextIt) {
     nextIt = findClause<T>(it, endIt);
 
     if (nextIt != endIt) {
+      callbackFn(std::get<T>(nextIt->u), nextIt->source);
+      found = true;
+      ++nextIt;
+    }
+  }
+  return found;
+}
+
+template <typename T>
+bool ClauseProcessor::findRepeatableClause2(
+    std::function<void(const T *, const Fortran::parser::CharBlock &source)>
+        callbackFn) const {
+  bool found = false;
+  ClauseIterator2 nextIt, endIt = clauses2.v.end();
+  for (ClauseIterator2 it = clauses2.v.begin(); it != endIt; it = nextIt) {
+    nextIt = findClause2<T>(it, endIt);
+
+    if (nextIt != endIt) {
       callbackFn(&std::get<T>(nextIt->u), nextIt->source);
       found = true;
       ++nextIt;
diff --git a/flang/lib/Lower/OpenMP/ClauseT.h b/flang/lib/Lower/OpenMP/ClauseT.h
new file mode 100644
index 0000000..2aae29a
--- /dev/null
+++ b/flang/lib/Lower/OpenMP/ClauseT.h
@@ -0,0 +1,714 @@
+//===- ClauseT -- clause template definitions -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef FORTRAN_LOWER_OPENMP_CLAUSET_H
+#define FORTRAN_LOWER_OPENMP_CLAUSET_H
+
+#include "flang/Parser/parse-tree.h" // For enum reuse
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <iterator>
+#include <optional>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <variant>
+
+#include "llvm/Frontend/OpenMP/OMP.h.inc"
+
+namespace tomp {
+
+template <typename T>
+using ListT = llvm::SmallVector<T, 0>;
+
+// A specialization of ObjectT<Id, Expr> must provide the following definitions:
+// {
+//    using IdType = Id;
+//    using ExprType = Expr;
+//
+//    auto id() const -> Id {
+//      return the identifier of the object (for use in tests for
+//         presence/absence of the object)
+//    }
+//
+//    auto ref() const -> const Expr& {
+//      return the expression accessing (referencing) the object
+//    }
+// }
+//
+// For example, the ObjectT instance created for "var[x+1]" would have
+// the `id()` return the identifier for `var`, and the `ref()` return the
+// representation of the array-access `var[x+1]`.
+template <typename Id, typename Expr>
+struct ObjectT;
+
+template <typename I, typename E>
+using ObjectListT = ListT<ObjectT<I, E>>;
+
+namespace clause {
+// Helper objects
+
+template <typename I, typename E>
+struct DefinedOperatorT {
+  struct DefinedOpName {
+    using WrapperTrait = std::true_type;
+    ObjectT<I, E> v;
+  };
+  using IntrinsicOperator = Fortran::parser::DefinedOperator::IntrinsicOperator;
+  using UnionTrait = std::true_type;
+  std::variant<DefinedOpName, IntrinsicOperator> u;
+};
+
+template <typename I, typename E>
+struct ProcedureDesignatorT {
+  using WrapperTrait = std::true_type;
+  ObjectT<I, E> v;
+};
+
+template <typename I, typename E>
+struct ReductionOperatorT {
+  using UnionTrait = std::true_type;
+  std::variant<DefinedOperatorT<I, E>, ProcedureDesignatorT<I, E>> u;
+};
+
+template <typename I, typename E>
+struct AcqRelT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct AcquireT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct AdjustArgsT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct AffinityT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct AlignT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct AppendArgsT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct AtT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct BindT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct CancellationConstructTypeT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct CaptureT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct CompareT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct DepobjT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct DestroyT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct DetachT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct DoacrossT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct DynamicAllocatorsT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct ExclusiveT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct FailT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct FlushT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct FullT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct InbranchT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct InclusiveT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct IndirectT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct InitT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct MatchT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct MemoryOrderT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct MergeableT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct MessageT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct NogroupT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct NotinbranchT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct NowaitT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct OmpxAttributeT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct OmpxBareT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct ReadT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct RelaxedT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct ReleaseT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct ReverseOffloadT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct SeqCstT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct SeverityT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct SimdT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct ThreadprivateT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct ThreadsT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct UnifiedAddressT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct UnifiedSharedMemoryT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct UnknownT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct UntiedT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct UpdateT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct UseT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct UsesAllocatorsT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct WeakT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct WhenT {
+  using EmptyTrait = std::true_type;
+};
+template <typename I, typename E>
+struct WriteT {
+  using EmptyTrait = std::true_type;
+};
+
+template <typename I, typename E>
+struct AlignedT {
+  using TupleTrait = std::true_type;
+  std::tuple<ObjectListT<I, E>, std::optional<E>> t;
+};
+
+template <typename I, typename E>
+struct AllocateT {
+  struct Modifier {
+    struct Allocator {
+      using WrapperTrait = std::true_type;
+      E v;
+    };
+    struct Align {
+      using WrapperTrait = std::true_type;
+      E v;
+    };
+    struct ComplexModifier {
+      using TupleTrait = std::true_type;
+      std::tuple<Allocator, Align> t;
+    };
+    using UnionTrait = std::true_type;
+    std::variant<Allocator, ComplexModifier, Align> u;
+  };
+  using TupleTrait = std::true_type;
+  std::tuple<std::optional<Modifier>, ObjectListT<I, E>> t;
+};
+
+template <typename I, typename E>
+struct AllocatorT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct AtomicDefaultMemOrderT {
+  using WrapperTrait = std::true_type;
+  using OmpAtomicDefaultMemOrderType =
+      Fortran::common::OmpAtomicDefaultMemOrderType;
+  OmpAtomicDefaultMemOrderType v;
+};
+
+template <typename I, typename E>
+struct CollapseT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct CopyinT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct CopyprivateT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct DefaultmapT {
+  using ImplicitBehavior =
+      Fortran::parser::OmpDefaultmapClause::ImplicitBehavior;
+  using VariableCategory =
+      Fortran::parser::OmpDefaultmapClause::VariableCategory;
+  using TupleTrait = std::true_type;
+  std::tuple<ImplicitBehavior, std::optional<VariableCategory>> t;
+};
+
+template <typename I, typename E>
+struct DefaultT {
+  using Type = Fortran::parser::OmpDefaultClause::Type;
+  using WrapperTrait = std::true_type;
+  Type v;
+};
+
+template <typename I, typename E>
+struct DependT {
+  struct Source {
+    using EmptyTrait = std::true_type;
+  };
+  struct Sink {
+    using Length = std::tuple<DefinedOperatorT<I, E>, E>;
+    using Vec = std::tuple<ObjectT<I, E>, std::optional<Length>>;
+    using WrapperTrait = std::true_type;
+    ListT<Vec> v;
+  };
+  using Type = Fortran::parser::OmpDependenceType::Type;
+  struct InOut {
+    using TupleTrait = std::true_type;
+    std::tuple<Type, ObjectListT<I, E>> t;
+  };
+  using UnionTrait = std::true_type;
+  std::variant<Source, Sink, InOut> u;
+};
+
+template <typename I, typename E>
+struct DeviceT {
+  using DeviceModifier = Fortran::parser::OmpDeviceClause::DeviceModifier;
+  using TupleTrait = std::true_type;
+  std::tuple<std::optional<DeviceModifier>, E> t;
+};
+
+template <typename I, typename E>
+struct DeviceTypeT {
+  using Type = Fortran::parser::OmpDeviceTypeClause::Type;
+  using WrapperTrait = std::true_type;
+  Type v;
+};
+
+template <typename I, typename E>
+struct DistScheduleT {
+  using WrapperTrait = std::true_type;
+  std::optional<E> v;
+};
+
+template <typename I, typename E>
+struct EnterT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct FilterT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct FinalT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct FirstprivateT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct FromT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct GrainsizeT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct HasDeviceAddrT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct HintT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct IfT {
+  using DirectiveNameModifier =
+      Fortran::parser::OmpIfClause::DirectiveNameModifier;
+  using TupleTrait = std::true_type;
+  std::tuple<std::optional<DirectiveNameModifier>, E> t;
+};
+
+template <typename I, typename E>
+struct InReductionT {
+  using TupleTrait = std::true_type;
+  std::tuple<ReductionOperatorT<I, E>, ObjectListT<I, E>> t;
+};
+
+template <typename I, typename E>
+struct IsDevicePtrT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct LastprivateT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct LinearT {
+  struct Modifier {
+    using Type = Fortran::parser::OmpLinearModifier::Type;
+    using WrapperTrait = std::true_type;
+    Type v;
+  };
+  using TupleTrait = std::true_type;
+  std::tuple<std::optional<Modifier>, ObjectListT<I, E>, std::optional<E>> t;
+};
+
+template <typename I, typename E>
+struct LinkT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct MapT {
+  struct MapType {
+    struct Always {
+      using EmptyTrait = std::true_type;
+    };
+    using Type = Fortran::parser::OmpMapType::Type;
+    using TupleTrait = std::true_type;
+    std::tuple<std::optional<Always>, Type> t;
+  };
+  using TupleTrait = std::true_type;
+  std::tuple<std::optional<MapType>, ObjectListT<I, E>> t;
+};
+
+template <typename I, typename E>
+struct NocontextT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct NontemporalT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct NovariantsT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct NumTasksT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct NumTeamsT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct NumThreadsT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct OmpxDynCgroupMemT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct OrderedT {
+  using WrapperTrait = std::true_type;
+  std::optional<E> v;
+};
+
+template <typename I, typename E>
+struct OrderT {
+  using Kind = Fortran::parser::OmpOrderModifier::Kind;
+  using Type = Fortran::parser::OmpOrderClause::Type;
+  using TupleTrait = std::true_type;
+  std::tuple<std::optional<Kind>, Type> t;
+};
+
+template <typename I, typename E>
+struct PartialT {
+  using WrapperTrait = std::true_type;
+  std::optional<E> v;
+};
+
+template <typename I, typename E>
+struct PriorityT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct PrivateT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct ProcBindT {
+  using Type = Fortran::parser::OmpProcBindClause::Type;
+  using WrapperTrait = std::true_type;
+  Type v;
+};
+
+template <typename I, typename E>
+struct ReductionT {
+  using TupleTrait = std::true_type;
+  std::tuple<ReductionOperatorT<I, E>, ObjectListT<I, E>> t;
+};
+
+template <typename I, typename E>
+struct SafelenT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct ScheduleT {
+  using ModType = Fortran::parser::OmpScheduleModifierType::ModType;
+  struct ScheduleModifier {
+    using TupleTrait = std::true_type;
+    std::tuple<ModType, std::optional<ModType>> t;
+  };
+  using ScheduleType = Fortran::parser::OmpScheduleClause::ScheduleType;
+  using TupleTrait = std::true_type;
+  std::tuple<std::optional<ScheduleModifier>, ScheduleType, std::optional<E>> t;
+};
+
+template <typename I, typename E>
+struct SharedT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct SimdlenT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct SizesT {
+  using WrapperTrait = std::true_type;
+  ListT<E> v;
+};
+
+template <typename I, typename E>
+struct TaskReductionT {
+  using TupleTrait = std::true_type;
+  std::tuple<ReductionOperatorT<I, E>, ObjectListT<I, E>> t;
+};
+
+template <typename I, typename E>
+struct ThreadLimitT {
+  using WrapperTrait = std::true_type;
+  E v;
+};
+
+template <typename I, typename E>
+struct ToT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct UniformT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct UseDeviceAddrT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+struct UseDevicePtrT {
+  using WrapperTrait = std::true_type;
+  ObjectListT<I, E> v;
+};
+
+template <typename I, typename E>
+using UnionOfAllClausesT = std::variant<
+    AcqRelT<I, E>, AcquireT<I, E>, AdjustArgsT<I, E>, AffinityT<I, E>,
+    AlignT<I, E>, AlignedT<I, E>, AllocateT<I, E>, AllocatorT<I, E>,
+    AppendArgsT<I, E>, AtT<I, E>, AtomicDefaultMemOrderT<I, E>, BindT<I, E>,
+    CancellationConstructTypeT<I, E>, CaptureT<I, E>, CollapseT<I, E>,
+    CompareT<I, E>, CopyprivateT<I, E>, CopyinT<I, E>, DefaultT<I, E>,
+    DefaultmapT<I, E>, DependT<I, E>, DepobjT<I, E>, DestroyT<I, E>,
+    DetachT<I, E>, DeviceT<I, E>, DeviceTypeT<I, E>, DistScheduleT<I, E>,
+    DoacrossT<I, E>, DynamicAllocatorsT<I, E>, EnterT<I, E>, ExclusiveT<I, E>,
+    FailT<I, E>, FilterT<I, E>, FinalT<I, E>, FirstprivateT<I, E>, FlushT<I, E>,
+    FromT<I, E>, FullT<I, E>, GrainsizeT<I, E>, HasDeviceAddrT<I, E>,
+    HintT<I, E>, IfT<I, E>, InReductionT<I, E>, InbranchT<I, E>,
+    InclusiveT<I, E>, IndirectT<I, E>, InitT<I, E>, IsDevicePtrT<I, E>,
+    LastprivateT<I, E>, LinearT<I, E>, LinkT<I, E>, MapT<I, E>, MatchT<I, E>,
+    MemoryOrderT<I, E>, MergeableT<I, E>, MessageT<I, E>, NogroupT<I, E>,
+    NowaitT<I, E>, NocontextT<I, E>, NontemporalT<I, E>, NotinbranchT<I, E>,
+    NovariantsT<I, E>, NumTasksT<I, E>, NumTeamsT<I, E>, NumThreadsT<I, E>,
+    OmpxAttributeT<I, E>, OmpxDynCgroupMemT<I, E>, OmpxBareT<I, E>,
+    OrderT<I, E>, OrderedT<I, E>, PartialT<I, E>, PriorityT<I, E>,
+    PrivateT<I, E>, ProcBindT<I, E>, ReadT<I, E>, ReductionT<I, E>,
+    RelaxedT<I, E>, ReleaseT<I, E>, ReverseOffloadT<I, E>, SafelenT<I, E>,
+    ScheduleT<I, E>, SeqCstT<I, E>, SeverityT<I, E>, SharedT<I, E>, SimdT<I, E>,
+    SimdlenT<I, E>, SizesT<I, E>, TaskReductionT<I, E>, ThreadLimitT<I, E>,
+    ThreadprivateT<I, E>, ThreadsT<I, E>, ToT<I, E>, UnifiedAddressT<I, E>,
+    UnifiedSharedMemoryT<I, E>, UniformT<I, E>, UnknownT<I, E>, UntiedT<I, E>,
+    UpdateT<I, E>, UseT<I, E>, UseDeviceAddrT<I, E>, UseDevicePtrT<I, E>,
+    UsesAllocatorsT<I, E>, WeakT<I, E>, WhenT<I, E>, WriteT<I, E>>;
+} // namespace clause
+
+template <typename Id, typename Expr>
+struct ClauseT {
+  llvm::omp::Clause id; // The numeric id of the clause
+  using UnionTrait = std::true_type;
+  clause::UnionOfAllClausesT<Id, Expr> u;
+};
+
+} // namespace tomp
+
+#endif // FORTRAN_LOWER_OPENMP_CLAUSET_H
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
new file mode 100644
index 0000000..70f232a
--- /dev/null
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -0,0 +1,728 @@
+//===-- Clauses.cpp -- OpenMP clause handling -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Clauses.h"
+
+#include "flang/Common/idioms.h"
+#include "flang/Evaluate/expression.h"
+#include "flang/Parser/parse-tree.h"
+#include "flang/Semantics/expression.h"
+#include "flang/Semantics/symbol.h"
+
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+
+#include <list>
+#include <optional>
+#include <tuple>
+#include <utility>
+#include <variant>
+
+namespace detail {
+template <typename C>
+llvm::omp::Clause getClauseIdForClass(C &&) {
+  using namespace Fortran;
+  using A = llvm::remove_cvref_t<C>; // A is referenced in OMP.inc
+  // The code included below contains a sequence of checks like the following
+  // for each OpenMP clause
+  //   if constexpr (std::is_same_v<A, parser::OmpClause::AcqRel>)
+  //     return llvm::omp::Clause::OMPC_acq_rel;
+  //   [...]
+#define GEN_FLANG_CLAUSE_PARSER_KIND_MAP
+#include "llvm/Frontend/OpenMP/OMP.inc"
+}
+} // namespace detail
+
+static llvm::omp::Clause getClauseId(const Fortran::parser::OmpClause &clause) {
+  return std::visit([](auto &&s) { return detail::getClauseIdForClass(s); },
+                    clause.u);
+}
+
+namespace Fortran::lower::omp {
+using SymbolWithDesignator = std::tuple<semantics::Symbol *, MaybeExpr>;
+
+struct SymbolAndDesignatorExtractor {
+  template <typename T>
+  static T &&AsRvalueRef(T &&t) {
+    return std::move(t);
+  }
+  template <typename T>
+  static T AsRvalueRef(const T &t) {
+    return t;
+  }
+
+  static semantics::Symbol *symbol_addr(const evaluate::SymbolRef &ref) {
+    // Symbols cannot be created after semantic checks, so all symbol
+    // pointers that are non-null must point to one of those pre-existing
+    // objects. Throughout the code, symbols are often pointed to by
+    // non-const pointers, so there is no harm in casting the constness
+    // away.
+    return const_cast<semantics::Symbol *>(&ref.get());
+  }
+
+  template <typename T>
+  static SymbolWithDesignator visit(T &&) {
+    // Use this to see missing overloads:
+    // llvm::errs() << "NULL: " << __PRETTY_FUNCTION__ << '\n';
+    return SymbolWithDesignator{};
+  }
+
+  template <typename T>
+  static SymbolWithDesignator visit(const evaluate::Designator<T> &e) {
+    return std::make_tuple(symbol_addr(*e.GetLastSymbol()),
+                           evaluate::AsGenericExpr(AsRvalueRef(e)));
+  }
+
+  static SymbolWithDesignator visit(const evaluate::ProcedureDesignator &e) {
+    return std::make_tuple(symbol_addr(*e.GetSymbol()), std::nullopt);
+  }
+
+  template <typename T>
+  static SymbolWithDesignator visit(const evaluate::Expr<T> &e) {
+    return std::visit([](auto &&s) { return visit(s); }, e.u);
+  }
+
+  static void verify(const SymbolWithDesignator &sd) {
+    const semantics::Symbol *symbol = std::get<0>(sd);
+    assert(symbol && "Expecting symbol");
+    auto &maybeDsg = std::get<1>(sd);
+    if (!maybeDsg)
+      return; // Symbol with no designator -> OK
+    std::optional<evaluate::DataRef> maybeRef =
+        evaluate::ExtractDataRef(*maybeDsg);
+    if (maybeRef) {
+      if (&maybeRef->GetLastSymbol() == symbol)
+        return; // Symbol with a designator for it -> OK
+      llvm_unreachable("Expecting designator for given symbol");
+    } else {
+      // This could still be a Substring or ComplexPart, but at least Substring
+      // is not allowed in OpenMP.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+      maybeDsg->dump();
+#endif
+      llvm_unreachable("Expecting DataRef designator");
+    }
+  }
+};
+
+SymbolWithDesignator getSymbolAndDesignator(const MaybeExpr &expr) {
+  if (!expr)
+    return SymbolWithDesignator{};
+  return std::visit(
+      [](auto &&s) { return SymbolAndDesignatorExtractor::visit(s); }, expr->u);
+}
+
+Object makeObject(const parser::Name &name,
+                  semantics::SemanticsContext &semaCtx) {
+  assert(name.symbol && "Expecting Symbol");
+  return Object{name.symbol, std::nullopt};
+}
+
+Object makeObject(const parser::Designator &dsg,
+                  semantics::SemanticsContext &semaCtx) {
+  evaluate::ExpressionAnalyzer ea{semaCtx};
+  SymbolWithDesignator sd = getSymbolAndDesignator(ea.Analyze(dsg));
+  SymbolAndDesignatorExtractor::verify(sd);
+  return Object{std::get<0>(sd), std::move(std::get<1>(sd))};
+}
+
+Object makeObject(const parser::StructureComponent &comp,
+                  semantics::SemanticsContext &semaCtx) {
+  evaluate::ExpressionAnalyzer ea{semaCtx};
+  SymbolWithDesignator sd = getSymbolAndDesignator(ea.Analyze(comp));
+  SymbolAndDesignatorExtractor::verify(sd);
+  return Object{std::get<0>(sd), std::move(std::get<1>(sd))};
+}
+
+Object makeObject(const parser::OmpObject &object,
+                  semantics::SemanticsContext &semaCtx) {
+  // If object is a common block, expression analyzer won't be able to
+  // do anything.
+  if (const auto *name = std::get_if<parser::Name>(&object.u)) {
+    assert(name->symbol && "Expecting Symbol");
+    return Object{name->symbol, std::nullopt};
+  }
+  // OmpObject is std::variant<Designator, /*common block*/ Name>;
+  return makeObject(std::get<parser::Designator>(object.u), semaCtx);
+}
+
+std::optional<Object>
+getBaseObject(const Object &object,
+              Fortran::semantics::SemanticsContext &semaCtx) {
+  // If it's just the symbol, then there is no base.
+  if (!object.id())
+    return std::nullopt;
+
+  auto maybeRef = evaluate::ExtractDataRef(*object.ref());
+  if (!maybeRef)
+    return std::nullopt;
+
+  evaluate::DataRef ref = *maybeRef;
+
+  if (std::get_if<evaluate::SymbolRef>(&ref.u)) {
+    return std::nullopt;
+  } else if (auto *comp = std::get_if<evaluate::Component>(&ref.u)) {
+    const evaluate::DataRef &base = comp->base();
+    return Object{
+        SymbolAndDesignatorExtractor::symbol_addr(base.GetLastSymbol()),
+        evaluate::AsGenericExpr(
+            SymbolAndDesignatorExtractor::AsRvalueRef(base))};
+  } else if (auto *arr = std::get_if<evaluate::ArrayRef>(&ref.u)) {
+    const evaluate::NamedEntity &base = arr->base();
+    evaluate::ExpressionAnalyzer ea{semaCtx};
+    if (auto *comp = base.UnwrapComponent()) {
+      return Object{SymbolAndDesignatorExtractor::symbol_addr(comp->symbol()),
+                    ea.Designate(evaluate::DataRef{
+                        SymbolAndDesignatorExtractor::AsRvalueRef(*comp)})};
+    } else if (base.UnwrapSymbolRef()) {
+      return std::nullopt;
+    }
+  } else {
+    assert(std::holds_alternative<evaluate::CoarrayRef>(ref.u) &&
+           "Unexpected variant alternative");
+    llvm_unreachable("Coarray reference not supported at the moment");
+  }
+  return std::nullopt;
+}
+
+namespace clause {
+// Helper objects
+#ifdef EMPTY_CLASS
+#undef EMPTY_CLASS
+#endif
+#define EMPTY_CLASS(cls)                                                       \
+  cls make(const parser::OmpClause::cls &, semantics::SemanticsContext &) {    \
+    return cls{};                                                              \
+  }                                                                            \
+  [[maybe_unused]] extern int xyzzy_semicolon_absorber
+
+#ifdef WRAPPER_CLASS
+#undef WRAPPER_CLASS
+#endif
+#define WRAPPER_CLASS(cls, content)                                            \
+  [[maybe_unused]] extern int xyzzy_semicolon_absorber
+#define GEN_FLANG_CLAUSE_PARSER_CLASSES
+#include "llvm/Frontend/OpenMP/OMP.inc"
+#undef EMPTY_CLASS
+#undef WRAPPER_CLASS
+
+DefinedOperator makeDefinedOperator(const parser::DefinedOperator &inp,
+                                    semantics::SemanticsContext &semaCtx) {
+  return std::visit(
+      common::visitors{
+          [&](const parser::DefinedOpName &s) {
+            return DefinedOperator{
+                DefinedOperator::DefinedOpName{makeObject(s.v, semaCtx)}};
+          },
+          [&](const parser::DefinedOperator::IntrinsicOperator &s) {
+            return DefinedOperator{s};
+          },
+      },
+      inp.u);
+}
+
+ProcedureDesignator
+makeProcedureDesignator(const parser::ProcedureDesignator &inp,
+                        semantics::SemanticsContext &semaCtx) {
+  return ProcedureDesignator{std::visit(
+      common::visitors{
+          [&](const parser::Name &t) { return makeObject(t, semaCtx); },
+          [&](const parser::ProcComponentRef &t) {
+            return makeObject(t.v.thing, semaCtx);
+          },
+      },
+      inp.u)};
+}
+
+ReductionOperator makeReductionOperator(const parser::OmpReductionOperator &inp,
+                                        semantics::SemanticsContext &semaCtx) {
+  return std::visit(
+      common::visitors{
+          [&](const parser::DefinedOperator &s) {
+            return ReductionOperator{makeDefinedOperator(s, semaCtx)};
+          },
+          [&](const parser::ProcedureDesignator &s) {
+            return ReductionOperator{makeProcedureDesignator(s, semaCtx)};
+          },
+      },
+      inp.u);
+}
+
+// Actual clauses. Each T (where OmpClause::T exists) has its "make".
+Aligned make(const parser::OmpClause::Aligned &inp,
+             semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpAlignedClause
+  auto &t0 = std::get<parser::OmpObjectList>(inp.v.t);
+  auto &t1 = std::get<std::optional<parser::ScalarIntConstantExpr>>(inp.v.t);
+
+  return Aligned{{
+      makeList(t0, semaCtx),
+      maybeApply(makeExprFn(semaCtx), t1),
+  }};
+}
+
+Allocate make(const parser::OmpClause::Allocate &inp,
+              semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpAllocateClause
+  using wrapped = parser::OmpAllocateClause;
+  auto &t0 = std::get<std::optional<wrapped::AllocateModifier>>(inp.v.t);
+  auto &t1 = std::get<parser::OmpObjectList>(inp.v.t);
+
+  auto convert = [&](auto &&s) -> Allocate::Modifier {
+    using Modifier = Allocate::Modifier;
+    using Allocator = Modifier::Allocator;
+    using Align = Modifier::Align;
+    using ComplexModifier = Modifier::ComplexModifier;
+
+    return std::visit(
+        common::visitors{
+            [&](const wrapped::AllocateModifier::Allocator &v) {
+              return Modifier{Allocator{makeExpr(v.v, semaCtx)}};
+            },
+            [&](const wrapped::AllocateModifier::ComplexModifier &v) {
+              auto &s0 = std::get<wrapped::AllocateModifier::Allocator>(v.t);
+              auto &s1 = std::get<wrapped::AllocateModifier::Align>(v.t);
+              return Modifier{ComplexModifier{{
+                  Allocator{makeExpr(s0.v, semaCtx)},
+                  Align{makeExpr(s1.v, semaCtx)},
+              }}};
+            },
+            [&](const wrapped::AllocateModifier::Align &v) {
+              return Modifier{Align{makeExpr(v.v, semaCtx)}};
+            },
+        },
+        s.u);
+  };
+
+  return Allocate{{maybeApply(convert, t0), makeList(t1, semaCtx)}};
+}
+
+Allocator make(const parser::OmpClause::Allocator &inp,
+               semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarIntExpr
+  return Allocator{makeExpr(inp.v, semaCtx)};
+}
+
+// Never called, but needed for using "make" as a Clause visitor.
+// See comment about "requires" clauses in Clauses.h.
+AtomicDefaultMemOrder make(const parser::OmpClause::AtomicDefaultMemOrder &inp,
+                           semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpAtomicDefaultMemOrderClause
+  return AtomicDefaultMemOrder{inp.v.v};
+}
+
+Collapse make(const parser::OmpClause::Collapse &inp,
+              semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarIntConstantExpr
+  return Collapse{makeExpr(inp.v, semaCtx)};
+}
+
+Copyin make(const parser::OmpClause::Copyin &inp,
+            semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return Copyin{makeList(inp.v, semaCtx)};
+}
+
+Copyprivate make(const parser::OmpClause::Copyprivate &inp,
+                 semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return Copyprivate{makeList(inp.v, semaCtx)};
+}
+
+Defaultmap make(const parser::OmpClause::Defaultmap &inp,
+                semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpDefaultmapClause
+  using wrapped = parser::OmpDefaultmapClause;
+
+  auto &t0 = std::get<wrapped::ImplicitBehavior>(inp.v.t);
+  auto &t1 = std::get<std::optional<wrapped::VariableCategory>>(inp.v.t);
+  return Defaultmap{{t0, t1}};
+}
+
+Default make(const parser::OmpClause::Default &inp,
+             semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpDefaultClause
+  return Default{inp.v.v};
+}
+
+Depend make(const parser::OmpClause::Depend &inp,
+            semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpDependClause
+  using wrapped = parser::OmpDependClause;
+
+  return std::visit(
+      common::visitors{
+          [&](const wrapped::Source &s) { return Depend{Depend::Source{}}; },
+          [&](const wrapped::Sink &s) {
+            auto convert = [&](const parser::OmpDependSinkVec &v) {
+              auto &t0 = std::get<parser::Name>(v.t);
+              auto &t1 =
+                  std::get<std::optional<parser::OmpDependSinkVecLength>>(v.t);
+              auto convert1 = [&](const parser::OmpDependSinkVecLength &u) {
+                auto &s0 = std::get<parser::DefinedOperator>(u.t);
+                auto &s1 = std::get<parser::ScalarIntConstantExpr>(u.t);
+                return Depend::Sink::Length{makeDefinedOperator(s0, semaCtx),
+                                            makeExpr(s1, semaCtx)};
+              };
+              return Depend::Sink::Vec{makeObject(t0, semaCtx),
+                                       maybeApply(convert1, t1)};
+            };
+            return Depend{Depend::Sink{makeList(s.v, convert)}};
+          },
+          [&](const wrapped::InOut &s) {
+            auto &t0 = std::get<parser::OmpDependenceType>(s.t);
+            auto &t1 = std::get<std::list<parser::Designator>>(s.t);
+            auto convert = [&](const parser::Designator &t) {
+              return makeObject(t, semaCtx);
+            };
+            return Depend{Depend::InOut{{t0.v, makeList(t1, convert)}}};
+          },
+      },
+      inp.v.u);
+}
+
+Device make(const parser::OmpClause::Device &inp,
+            semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpDeviceClause
+  using wrapped = parser::OmpDeviceClause;
+
+  auto &t0 = std::get<std::optional<wrapped::DeviceModifier>>(inp.v.t);
+  auto &t1 = std::get<parser::ScalarIntExpr>(inp.v.t);
+  return Device{{t0, makeExpr(t1, semaCtx)}};
+}
+
+DeviceType make(const parser::OmpClause::DeviceType &inp,
+                semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpDeviceTypeClause
+  return DeviceType{inp.v.v};
+}
+
+DistSchedule make(const parser::OmpClause::DistSchedule &inp,
+                  semantics::SemanticsContext &semaCtx) {
+  // inp.v -> std::optional<parser::ScalarIntExpr>
+  return DistSchedule{maybeApply(makeExprFn(semaCtx), inp.v)};
+}
+
+Enter make(const parser::OmpClause::Enter &inp,
+           semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return Enter{makeList(inp.v, semaCtx)};
+}
+
+Filter make(const parser::OmpClause::Filter &inp,
+            semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarIntExpr
+  return Filter{makeExpr(inp.v, semaCtx)};
+}
+
+Final make(const parser::OmpClause::Final &inp,
+           semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarLogicalExpr
+  return Final{makeExpr(inp.v, semaCtx)};
+}
+
+Firstprivate make(const parser::OmpClause::Firstprivate &inp,
+                  semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return Firstprivate{makeList(inp.v, semaCtx)};
+}
+
+From make(const parser::OmpClause::From &inp,
+          semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return From{makeList(inp.v, semaCtx)};
+}
+
+Grainsize make(const parser::OmpClause::Grainsize &inp,
+               semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarIntExpr
+  return Grainsize{makeExpr(inp.v, semaCtx)};
+}
+
+HasDeviceAddr make(const parser::OmpClause::HasDeviceAddr &inp,
+                   semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return HasDeviceAddr{makeList(inp.v, semaCtx)};
+}
+
+Hint make(const parser::OmpClause::Hint &inp,
+          semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ConstantExpr
+  return Hint{makeExpr(inp.v, semaCtx)};
+}
+
+If make(const parser::OmpClause::If &inp,
+        semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpIfClause
+  using wrapped = parser::OmpIfClause;
+
+  auto &t0 = std::get<std::optional<wrapped::DirectiveNameModifier>>(inp.v.t);
+  auto &t1 = std::get<parser::ScalarLogicalExpr>(inp.v.t);
+  return If{{t0, makeExpr(t1, semaCtx)}};
+}
+
+InReduction make(const parser::OmpClause::InReduction &inp,
+                 semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpInReductionClause
+  auto &t0 = std::get<parser::OmpReductionOperator>(inp.v.t);
+  auto &t1 = std::get<parser::OmpObjectList>(inp.v.t);
+  return InReduction{
+      {makeReductionOperator(t0, semaCtx), makeList(t1, semaCtx)}};
+}
+
+IsDevicePtr make(const parser::OmpClause::IsDevicePtr &inp,
+                 semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return IsDevicePtr{makeList(inp.v, semaCtx)};
+}
+
+Lastprivate make(const parser::OmpClause::Lastprivate &inp,
+                 semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return Lastprivate{makeList(inp.v, semaCtx)};
+}
+
+Linear make(const parser::OmpClause::Linear &inp,
+            semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpLinearClause
+  using wrapped = parser::OmpLinearClause;
+
+  return std::visit(
+      common::visitors{
+          [&](const wrapped::WithModifier &s) {
+            return Linear{{Linear::Modifier{s.modifier.v},
+                           makeList(s.names, makeObjectFn(semaCtx)),
+                           maybeApply(makeExprFn(semaCtx), s.step)}};
+          },
+          [&](const wrapped::WithoutModifier &s) {
+            return Linear{{std::nullopt,
+                           makeList(s.names, makeObjectFn(semaCtx)),
+                           maybeApply(makeExprFn(semaCtx), s.step)}};
+          },
+      },
+      inp.v.u);
+}
+
+Link make(const parser::OmpClause::Link &inp,
+          semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return Link{makeList(inp.v, semaCtx)};
+}
+
+Map make(const parser::OmpClause::Map &inp,
+         semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpMapClause
+  auto &t0 = std::get<std::optional<parser::OmpMapType>>(inp.v.t);
+  auto &t1 = std::get<parser::OmpObjectList>(inp.v.t);
+  auto convert = [](const parser::OmpMapType &s) {
+    auto &s0 = std::get<std::optional<parser::OmpMapType::Always>>(s.t);
+    auto &s1 = std::get<parser::OmpMapType::Type>(s.t);
+    auto convertT = [](parser::OmpMapType::Always) {
+      return Map::MapType::Always{};
+    };
+    return Map::MapType{{maybeApply(convertT, s0), s1}};
+  };
+  return Map{{maybeApply(convert, t0), makeList(t1, semaCtx)}};
+}
+
+Nocontext make(const parser::OmpClause::Nocontext &inp,
+               semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarLogicalExpr
+  return Nocontext{makeExpr(inp.v, semaCtx)};
+}
+
+Nontemporal make(const parser::OmpClause::Nontemporal &inp,
+                 semantics::SemanticsContext &semaCtx) {
+  // inp.v -> std::list<parser::Name>
+  return Nontemporal{makeList(inp.v, makeObjectFn(semaCtx))};
+}
+
+Novariants make(const parser::OmpClause::Novariants &inp,
+                semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarLogicalExpr
+  return Novariants{makeExpr(inp.v, semaCtx)};
+}
+
+NumTasks make(const parser::OmpClause::NumTasks &inp,
+              semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarIntExpr
+  return NumTasks{makeExpr(inp.v, semaCtx)};
+}
+
+NumTeams make(const parser::OmpClause::NumTeams &inp,
+              semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarIntExpr
+  return NumTeams{makeExpr(inp.v, semaCtx)};
+}
+
+NumThreads make(const parser::OmpClause::NumThreads &inp,
+                semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarIntExpr
+  return NumThreads{makeExpr(inp.v, semaCtx)};
+}
+
+OmpxDynCgroupMem make(const parser::OmpClause::OmpxDynCgroupMem &inp,
+                      semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarIntExpr
+  return OmpxDynCgroupMem{makeExpr(inp.v, semaCtx)};
+}
+
+Ordered make(const parser::OmpClause::Ordered &inp,
+             semantics::SemanticsContext &semaCtx) {
+  // inp.v -> std::optional<parser::ScalarIntConstantExpr>
+  return Ordered{maybeApply(makeExprFn(semaCtx), inp.v)};
+}
+
+Order make(const parser::OmpClause::Order &inp,
+           semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpOrderClause
+  using wrapped = parser::OmpOrderClause;
+  auto &t0 = std::get<std::optional<parser::OmpOrderModifier>>(inp.v.t);
+  auto &t1 = std::get<wrapped::Type>(inp.v.t);
+  auto convert = [](const parser::OmpOrderModifier &s) -> Order::Kind {
+    return std::get<parser::OmpOrderModifier::Kind>(s.u);
+  };
+  return Order{{maybeApply(convert, t0), t1}};
+}
+
+Partial make(const parser::OmpClause::Partial &inp,
+             semantics::SemanticsContext &semaCtx) {
+  // inp.v -> std::optional<parser::ScalarIntConstantExpr>
+  return Partial{maybeApply(makeExprFn(semaCtx), inp.v)};
+}
+
+Priority make(const parser::OmpClause::Priority &inp,
+              semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarIntExpr
+  return Priority{makeExpr(inp.v, semaCtx)};
+}
+
+Private make(const parser::OmpClause::Private &inp,
+             semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return Private{makeList(inp.v, semaCtx)};
+}
+
+ProcBind make(const parser::OmpClause::ProcBind &inp,
+              semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpProcBindClause
+  return ProcBind{inp.v.v};
+}
+
+Reduction make(const parser::OmpClause::Reduction &inp,
+               semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpReductionClause
+  auto &t0 = std::get<parser::OmpReductionOperator>(inp.v.t);
+  auto &t1 = std::get<parser::OmpObjectList>(inp.v.t);
+  return Reduction{{makeReductionOperator(t0, semaCtx), makeList(t1, semaCtx)}};
+}
+
+Safelen make(const parser::OmpClause::Safelen &inp,
+             semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarIntConstantExpr
+  return Safelen{makeExpr(inp.v, semaCtx)};
+}
+
+Schedule make(const parser::OmpClause::Schedule &inp,
+              semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpScheduleClause
+  using wrapped = parser::OmpScheduleClause;
+
+  auto &t0 = std::get<std::optional<parser::OmpScheduleModifier>>(inp.v.t);
+  auto &t1 = std::get<wrapped::ScheduleType>(inp.v.t);
+  auto &t2 = std::get<std::optional<parser::ScalarIntExpr>>(inp.v.t);
+
+  auto convert = [](auto &&s) -> Schedule::ScheduleModifier {
+    auto &s0 = std::get<parser::OmpScheduleModifier::Modifier1>(s.t);
+    auto &s1 =
+        std::get<std::optional<parser::OmpScheduleModifier::Modifier2>>(s.t);
+
+    auto convert1 = [](auto &&v) { // Modifier1 or Modifier2
+      return v.v.v;
+    };
+    return Schedule::ScheduleModifier{{s0.v.v, maybeApply(convert1, s1)}};
+  };
+
+  return Schedule{
+      {maybeApply(convert, t0), t1, maybeApply(makeExprFn(semaCtx), t2)}};
+}
+
+Shared make(const parser::OmpClause::Shared &inp,
+            semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return Shared{makeList(inp.v, semaCtx)};
+}
+
+Simdlen make(const parser::OmpClause::Simdlen &inp,
+             semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarIntConstantExpr
+  return Simdlen{makeExpr(inp.v, semaCtx)};
+}
+
+Sizes make(const parser::OmpClause::Sizes &inp,
+           semantics::SemanticsContext &semaCtx) {
+  // inp.v -> std::list<parser::ScalarIntExpr>
+  return Sizes{makeList(inp.v, makeExprFn(semaCtx))};
+}
+
+TaskReduction make(const parser::OmpClause::TaskReduction &inp,
+                   semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpReductionClause
+  auto &t0 = std::get<parser::OmpReductionOperator>(inp.v.t);
+  auto &t1 = std::get<parser::OmpObjectList>(inp.v.t);
+  return TaskReduction{
+      {makeReductionOperator(t0, semaCtx), makeList(t1, semaCtx)}};
+}
+
+ThreadLimit make(const parser::OmpClause::ThreadLimit &inp,
+                 semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::ScalarIntExpr
+  return ThreadLimit{makeExpr(inp.v, semaCtx)};
+}
+
+To make(const parser::OmpClause::To &inp,
+        semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return To{makeList(inp.v, semaCtx)};
+}
+
+Uniform make(const parser::OmpClause::Uniform &inp,
+             semantics::SemanticsContext &semaCtx) {
+  // inp.v -> std::list<parser::Name>
+  return Uniform{makeList(inp.v, makeObjectFn(semaCtx))};
+}
+
+UseDeviceAddr make(const parser::OmpClause::UseDeviceAddr &inp,
+                   semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return UseDeviceAddr{makeList(inp.v, semaCtx)};
+}
+
+UseDevicePtr make(const parser::OmpClause::UseDevicePtr &inp,
+                  semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpObjectList
+  return UseDevicePtr{makeList(inp.v, semaCtx)};
+}
+} // namespace clause
+
+Clause makeClause(const Fortran::parser::OmpClause &cls,
+                  semantics::SemanticsContext &semaCtx) {
+  return std::visit(
+      [&](auto &&s) {
+        return makeClause(getClauseId(cls), clause::make(s, semaCtx),
+                          cls.source);
+      },
+      cls.u);
+}
+
+List<Clause> makeList(const parser::OmpClauseList &clauses,
+                      semantics::SemanticsContext &semaCtx) {
+  return makeList(clauses.v, [&](const parser::OmpClause &s) {
+    return makeClause(s, semaCtx);
+  });
+}
+} // namespace Fortran::lower::omp
diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h
new file mode 100644
index 0000000..1d1a112
--- /dev/null
+++ b/flang/lib/Lower/OpenMP/Clauses.h
@@ -0,0 +1,212 @@
+//===-- Clauses.h -- OpenMP clause handling -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef FORTRAN_LOWER_OPENMP_CLAUSES_H
+#define FORTRAN_LOWER_OPENMP_CLAUSES_H
+
+#include "ClauseT.h"
+
+#include "flang/Evaluate/expression.h"
+#include "flang/Parser/parse-tree.h"
+#include "flang/Semantics/expression.h"
+#include "flang/Semantics/semantics.h"
+#include "flang/Semantics/symbol.h"
+
+#include "llvm/ADT/STLExtras.h"
+
+#include <optional>
+#include <type_traits>
+#include <utility>
+
+namespace Fortran::lower::omp {
+using namespace Fortran;
+using SomeType = evaluate::SomeType;
+using SomeExpr = semantics::SomeExpr;
+using MaybeExpr = semantics::MaybeExpr;
+
+using SymIdent = semantics::Symbol *;
+using SymReference = SomeExpr;
+
+template <typename T>
+using List = tomp::ListT<T>;
+} // namespace Fortran::lower::omp
+
+namespace tomp {
+template <>
+struct ObjectT<Fortran::lower::omp::SymIdent,
+               Fortran::lower::omp::SymReference> {
+  using IdType = Fortran::lower::omp::SymIdent;
+  using ExprType = Fortran::lower::omp::SymReference;
+
+  const IdType &id() const { return symbol; }
+  const std::optional<ExprType> &ref() const { return designator; }
+
+  IdType symbol;
+  std::optional<ExprType> designator;
+};
+} // namespace tomp
+
+namespace Fortran::lower::omp {
+
+using Object = tomp::ObjectT<SymIdent, SymReference>;
+using ObjectList = tomp::ObjectListT<SymIdent, SymReference>;
+
+Object makeObject(const parser::OmpObject &object,
+                  semantics::SemanticsContext &semaCtx);
+Object makeObject(const parser::Name &name,
+                  semantics::SemanticsContext &semaCtx);
+Object makeObject(const parser::Designator &dsg,
+                  semantics::SemanticsContext &semaCtx);
+Object makeObject(const parser::StructureComponent &comp,
+                  semantics::SemanticsContext &semaCtx);
+
+inline auto makeObjectFn(semantics::SemanticsContext &semaCtx) {
+  return [&](auto &&s) { return makeObject(s, semaCtx); };
+}
+
+template <typename T>
+SomeExpr makeExpr(T &&pftExpr, semantics::SemanticsContext &semaCtx) {
+  auto maybeExpr = evaluate::ExpressionAnalyzer(semaCtx).Analyze(pftExpr);
+  assert(maybeExpr);
+  return std::move(*maybeExpr);
+}
+
+inline auto makeExprFn(semantics::SemanticsContext &semaCtx) {
+  return [&](auto &&s) { return makeExpr(s, semaCtx); };
+}
+
+template <
+    typename ContainerTy, typename FunctionTy,
+    typename ElemTy = typename llvm::remove_cvref_t<ContainerTy>::value_type,
+    typename ResultTy = std::invoke_result_t<FunctionTy, ElemTy>>
+List<ResultTy> makeList(ContainerTy &&container, FunctionTy &&func) {
+  List<ResultTy> v;
+  llvm::transform(container, std::back_inserter(v), func);
+  return v;
+}
+
+inline ObjectList makeList(const parser::OmpObjectList &objects,
+                           semantics::SemanticsContext &semaCtx) {
+  return makeList(objects.v, makeObjectFn(semaCtx));
+}
+
+template <typename FuncTy, typename ElemTy,
+          typename ResultTy = std::invoke_result_t<FuncTy, ElemTy>>
+std::optional<ResultTy> maybeApply(FuncTy &&func,
+                                   const std::optional<ElemTy> &inp) {
+  if (!inp)
+    return std::nullopt;
+  return std::move(func(*inp));
+}
+
+std::optional<Object>
+getBaseObject(const Object &object,
+              Fortran::semantics::SemanticsContext &semaCtx);
+
+namespace clause {
+#ifdef EMPTY_CLASS
+#undef EMPTY_CLASS
+#endif
+#define EMPTY_CLASS(cls)                                                       \
+  using cls = tomp::clause::cls##T<SymIdent, SymReference>
+
+#ifdef WRAPPER_CLASS
+#undef WRAPPER_CLASS
+#endif
+#define WRAPPER_CLASS(cls, content)                                            \
+  [[maybe_unused]] extern int xyzzy_semicolon_absorber
+#define GEN_FLANG_CLAUSE_PARSER_CLASSES
+#include "llvm/Frontend/OpenMP/OMP.inc"
+#undef EMPTY_CLASS
+#undef WRAPPER_CLASS
+
+using DefinedOperator = tomp::clause::DefinedOperatorT<SymIdent, SymReference>;
+using ProcedureDesignator =
+    tomp::clause::ProcedureDesignatorT<SymIdent, SymReference>;
+using ReductionOperator =
+    tomp::clause::ReductionOperatorT<SymIdent, SymReference>;
+
+// "Requires" clauses are handled early on, and the aggregated information
+// is stored in the Symbol details of modules, programs, and subprograms.
+// These clauses are still handled here to cover all alternatives in the
+// main clause variant.
+
+using Aligned = tomp::clause::AlignedT<SymIdent, SymReference>;
+using Allocate = tomp::clause::AllocateT<SymIdent, SymReference>;
+using Allocator = tomp::clause::AllocatorT<SymIdent, SymReference>;
+using AtomicDefaultMemOrder =
+    tomp::clause::AtomicDefaultMemOrderT<SymIdent, SymReference>;
+using Collapse = tomp::clause::CollapseT<SymIdent, SymReference>;
+using Copyin = tomp::clause::CopyinT<SymIdent, SymReference>;
+using Copyprivate = tomp::clause::CopyprivateT<SymIdent, SymReference>;
+using Defaultmap = tomp::clause::DefaultmapT<SymIdent, SymReference>;
+using Default = tomp::clause::DefaultT<SymIdent, SymReference>;
+using Depend = tomp::clause::DependT<SymIdent, SymReference>;
+using Device = tomp::clause::DeviceT<SymIdent, SymReference>;
+using DeviceType = tomp::clause::DeviceTypeT<SymIdent, SymReference>;
+using DistSchedule = tomp::clause::DistScheduleT<SymIdent, SymReference>;
+using Enter = tomp::clause::EnterT<SymIdent, SymReference>;
+using Filter = tomp::clause::FilterT<SymIdent, SymReference>;
+using Final = tomp::clause::FinalT<SymIdent, SymReference>;
+using Firstprivate = tomp::clause::FirstprivateT<SymIdent, SymReference>;
+using From = tomp::clause::FromT<SymIdent, SymReference>;
+using Grainsize = tomp::clause::GrainsizeT<SymIdent, SymReference>;
+using HasDeviceAddr = tomp::clause::HasDeviceAddrT<SymIdent, SymReference>;
+using Hint = tomp::clause::HintT<SymIdent, SymReference>;
+using If = tomp::clause::IfT<SymIdent, SymReference>;
+using InReduction = tomp::clause::InReductionT<SymIdent, SymReference>;
+using IsDevicePtr = tomp::clause::IsDevicePtrT<SymIdent, SymReference>;
+using Lastprivate = tomp::clause::LastprivateT<SymIdent, SymReference>;
+using Linear = tomp::clause::LinearT<SymIdent, SymReference>;
+using Link = tomp::clause::LinkT<SymIdent, SymReference>;
+using Map = tomp::clause::MapT<SymIdent, SymReference>;
+using Nocontext = tomp::clause::NocontextT<SymIdent, SymReference>;
+using Nontemporal = tomp::clause::NontemporalT<SymIdent, SymReference>;
+using Novariants = tomp::clause::NovariantsT<SymIdent, SymReference>;
+using NumTasks = tomp::clause::NumTasksT<SymIdent, SymReference>;
+using NumTeams = tomp::clause::NumTeamsT<SymIdent, SymReference>;
+using NumThreads = tomp::clause::NumThreadsT<SymIdent, SymReference>;
+using OmpxDynCgroupMem =
+    tomp::clause::OmpxDynCgroupMemT<SymIdent, SymReference>;
+using Ordered = tomp::clause::OrderedT<SymIdent, SymReference>;
+using Order = tomp::clause::OrderT<SymIdent, SymReference>;
+using Partial = tomp::clause::PartialT<SymIdent, SymReference>;
+using Priority = tomp::clause::PriorityT<SymIdent, SymReference>;
+using Private = tomp::clause::PrivateT<SymIdent, SymReference>;
+using ProcBind = tomp::clause::ProcBindT<SymIdent, SymReference>;
+using Reduction = tomp::clause::ReductionT<SymIdent, SymReference>;
+using Safelen = tomp::clause::SafelenT<SymIdent, SymReference>;
+using Schedule = tomp::clause::ScheduleT<SymIdent, SymReference>;
+using Shared = tomp::clause::SharedT<SymIdent, SymReference>;
+using Simdlen = tomp::clause::SimdlenT<SymIdent, SymReference>;
+using Sizes = tomp::clause::SizesT<SymIdent, SymReference>;
+using TaskReduction = tomp::clause::TaskReductionT<SymIdent, SymReference>;
+using ThreadLimit = tomp::clause::ThreadLimitT<SymIdent, SymReference>;
+using To = tomp::clause::ToT<SymIdent, SymReference>;
+using Uniform = tomp::clause::UniformT<SymIdent, SymReference>;
+using UseDeviceAddr = tomp::clause::UseDeviceAddrT<SymIdent, SymReference>;
+using UseDevicePtr = tomp::clause::UseDevicePtrT<SymIdent, SymReference>;
+} // namespace clause
+
+struct Clause : public tomp::ClauseT<SymIdent, SymReference> {
+  parser::CharBlock source;
+};
+
+template <typename Specific>
+Clause makeClause(llvm::omp::Clause id, Specific &&specific,
+                  parser::CharBlock source = {}) {
+  return Clause{{id, specific}, source};
+}
+
+Clause makeClause(const Fortran::parser::OmpClause &cls,
+                  semantics::SemanticsContext &semaCtx);
+
+List<Clause> makeList(const parser::OmpClauseList &clauses,
+                      semantics::SemanticsContext &semaCtx);
+} // namespace Fortran::lower::omp
+
+#endif // FORTRAN_LOWER_OPENMP_CLAUSES_H
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 25bb4d9..5d4db06 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -574,8 +574,7 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
   llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSymbols;
 
   ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Parallel,
-               ifClauseOperand);
+  cp.processIf(clause::If::DirectiveNameModifier::Parallel, ifClauseOperand);
   cp.processNumThreads(stmtCtx, numThreadsClauseOperand);
   cp.processProcBind(procBindKindAttr);
   cp.processDefault();
@@ -751,8 +750,7 @@ genTaskOp(Fortran::lower::AbstractConverter &converter,
       dependOperands;
 
   ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Task,
-               ifClauseOperand);
+  cp.processIf(clause::If::DirectiveNameModifier::Task, ifClauseOperand);
   cp.processAllocate(allocatorOperands, allocateOperands);
   cp.processDefault();
   cp.processFinal(stmtCtx, finalClauseOperand);
@@ -865,8 +863,7 @@ genDataOp(Fortran::lower::AbstractConverter &converter,
   llvm::SmallVector<const Fortran::semantics::Symbol *> useDeviceSymbols;
 
   ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetData,
-               ifClauseOperand);
+  cp.processIf(clause::If::DirectiveNameModifier::TargetData, ifClauseOperand);
   cp.processDevice(stmtCtx, deviceOperand);
   cp.processUseDevicePtr(devicePtrOperands, useDeviceTypes, useDeviceLocs,
                          useDeviceSymbols);
@@ -911,20 +908,17 @@ genEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter,
   llvm::SmallVector<mlir::Value> mapOperands, dependOperands;
   llvm::SmallVector<mlir::Attribute> dependTypeOperands;
 
-  Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName;
+  clause::If::DirectiveNameModifier directiveName;
   // GCC 9.3.0 emits a (probably) bogus warning about an unused variable.
   [[maybe_unused]] llvm::omp::Directive directive;
   if constexpr (std::is_same_v<OpTy, mlir::omp::EnterDataOp>) {
-    directiveName =
-        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetEnterData;
+    directiveName = clause::If::DirectiveNameModifier::TargetEnterData;
     directive = llvm::omp::Directive::OMPD_target_enter_data;
   } else if constexpr (std::is_same_v<OpTy, mlir::omp::ExitDataOp>) {
-    directiveName =
-        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetExitData;
+    directiveName = clause::If::DirectiveNameModifier::TargetExitData;
     directive = llvm::omp::Directive::OMPD_target_exit_data;
   } else if constexpr (std::is_same_v<OpTy, mlir::omp::UpdateDataOp>) {
-    directiveName =
-        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate;
+    directiveName = clause::If::DirectiveNameModifier::TargetUpdate;
     directive = llvm::omp::Directive::OMPD_target_update;
   } else {
     return nullptr;
@@ -1126,8 +1120,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
   llvm::SmallVector<const Fortran::semantics::Symbol *> mapSymbols;
 
   ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Target,
-               ifClauseOperand);
+  cp.processIf(clause::If::DirectiveNameModifier::Target, ifClauseOperand);
   cp.processDevice(stmtCtx, deviceOperand);
   cp.processThreadLimit(stmtCtx, threadLimitOperand);
   cp.processDepend(dependTypeOperands, dependOperands);
@@ -1258,8 +1251,7 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter,
   llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
 
   ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Teams,
-               ifClauseOperand);
+  cp.processIf(clause::If::DirectiveNameModifier::Teams, ifClauseOperand);
   cp.processAllocate(allocatorOperands, allocateOperands);
   cp.processDefault();
   cp.processNumTeams(stmtCtx, numTeamsClauseOperand);
@@ -1298,8 +1290,9 @@ static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo(
 
   if (const auto *objectList{
           Fortran::parser::Unwrap<Fortran::parser::OmpObjectList>(spec.u)}) {
+    ObjectList objects{makeList(*objectList, semaCtx)};
     // Case: declare target(func, var1, var2)
-    gatherFuncAndVarSyms(*objectList, mlir::omp::DeclareTargetCaptureClause::to,
+    gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to,
                          symbolAndClause);
   } else if (const auto *clauseList{
                  Fortran::parser::Unwrap<Fortran::parser::OmpClauseList>(
@@ -1438,7 +1431,7 @@ genOmpFlush(Fortran::lower::AbstractConverter &converter,
   if (const auto &ompObjectList =
           std::get<std::optional<Fortran::parser::OmpObjectList>>(
               flushConstruct.t))
-    genObjectList(*ompObjectList, converter, operandRange);
+    genObjectList2(*ompObjectList, converter, operandRange);
   const auto &memOrderClause =
       std::get<std::optional<std::list<Fortran::parser::OmpMemoryOrderClause>>>(
           flushConstruct.t);
@@ -1600,8 +1593,7 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter,
                      loopVarTypeSize);
   cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand);
   cp.processReduction(loc, reductionVars, reductionDeclSymbols);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Simd,
-               ifClauseOperand);
+  cp.processIf(clause::If::DirectiveNameModifier::Simd, ifClauseOperand);
   cp.processSimdlen(simdlenClauseOperand);
   cp.processSafelen(safelenClauseOperand);
   cp.processTODO<Fortran::parser::OmpClause::Aligned,
@@ -2419,106 +2411,100 @@ void Fortran::lower::genOpenMPReduction(
     const Fortran::parser::OmpClauseList &clauseList) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
-  for (const Fortran::parser::OmpClause &clause : clauseList.v) {
+  List<Clause> clauses{makeList(clauseList, semaCtx)};
+
+  for (const Clause &clause : clauses) {
     if (const auto &reductionClause =
-            std::get_if<Fortran::parser::OmpClause::Reduction>(&clause.u)) {
-      const auto &redOperator{std::get<Fortran::parser::OmpReductionOperator>(
-          reductionClause->v.t)};
-      const auto &objectList{
-          std::get<Fortran::parser::OmpObjectList>(reductionClause->v.t)};
+            std::get_if<clause::Reduction>(&clause.u)) {
+      const auto &redOperator{
+          std::get<clause::ReductionOperator>(reductionClause->t)};
+      const auto &objects{std::get<ObjectList>(reductionClause->t)};
       if (const auto *reductionOp =
-              std::get_if<Fortran::parser::DefinedOperator>(&redOperator.u)) {
+              std::get_if<clause::DefinedOperator>(&redOperator.u)) {
         const auto &intrinsicOp{
-            std::get<Fortran::parser::DefinedOperator::IntrinsicOperator>(
+            std::get<clause::DefinedOperator::IntrinsicOperator>(
                 reductionOp->u)};
 
         switch (intrinsicOp) {
-        case Fortran::parser::DefinedOperator::IntrinsicOperator::Add:
-        case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply:
-        case Fortran::parser::DefinedOperator::IntrinsicOperator::AND:
-        case Fortran::parser::DefinedOperator::IntrinsicOperator::EQV:
-        case Fortran::parser::DefinedOperator::IntrinsicOperator::OR:
-        case Fortran::parser::DefinedOperator::IntrinsicOperator::NEQV:
+        case clause::DefinedOperator::IntrinsicOperator::Add:
+        case clause::DefinedOperator::IntrinsicOperator::Multiply:
+        case clause::DefinedOperator::IntrinsicOperator::AND:
+        case clause::DefinedOperator::IntrinsicOperator::EQV:
+        case clause::DefinedOperator::IntrinsicOperator::OR:
+        case clause::DefinedOperator::IntrinsicOperator::NEQV:
           break;
         default:
           continue;
         }
-        for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
-          if (const auto *name{
-                  Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
-            if (const Fortran::semantics::Symbol * symbol{name->symbol}) {
-              mlir::Value reductionVal = converter.getSymbolAddress(*symbol);
-              if (auto declOp = reductionVal.getDefiningOp<hlfir::DeclareOp>())
-                reductionVal = declOp.getBase();
-              mlir::Type reductionType =
-                  reductionVal.getType().cast<fir::ReferenceType>().getEleTy();
-              if (!reductionType.isa<fir::LogicalType>()) {
-                if (!reductionType.isIntOrIndexOrFloat())
-                  continue;
-              }
-              for (mlir::OpOperand &reductionValUse : reductionVal.getUses()) {
-                if (auto loadOp = mlir::dyn_cast<fir::LoadOp>(
-                        reductionValUse.getOwner())) {
-                  mlir::Value loadVal = loadOp.getRes();
-                  if (reductionType.isa<fir::LogicalType>()) {
-                    mlir::Operation *reductionOp = findReductionChain(loadVal);
-                    fir::ConvertOp convertOp =
-                        getConvertFromReductionOp(reductionOp, loadVal);
-                    updateReduction(reductionOp, firOpBuilder, loadVal,
-                                    reductionVal, &convertOp);
-                    removeStoreOp(reductionOp, reductionVal);
-                  } else if (mlir::Operation *reductionOp =
-                                 findReductionChain(loadVal, &reductionVal)) {
-                    updateReduction(reductionOp, firOpBuilder, loadVal,
-                                    reductionVal);
-                  }
+        for (const Object &object : objects) {
+          if (const Fortran::semantics::Symbol *symbol = object.id()) {
+            mlir::Value reductionVal = converter.getSymbolAddress(*symbol);
+            if (auto declOp = reductionVal.getDefiningOp<hlfir::DeclareOp>())
+              reductionVal = declOp.getBase();
+            mlir::Type reductionType =
+                reductionVal.getType().cast<fir::ReferenceType>().getEleTy();
+            if (!reductionType.isa<fir::LogicalType>()) {
+              if (!reductionType.isIntOrIndexOrFloat())
+                continue;
+            }
+            for (mlir::OpOperand &reductionValUse : reductionVal.getUses()) {
+              if (auto loadOp =
+                      mlir::dyn_cast<fir::LoadOp>(reductionValUse.getOwner())) {
+                mlir::Value loadVal = loadOp.getRes();
+                if (reductionType.isa<fir::LogicalType>()) {
+                  mlir::Operation *reductionOp = findReductionChain(loadVal);
+                  fir::ConvertOp convertOp =
+                      getConvertFromReductionOp(reductionOp, loadVal);
+                  updateReduction(reductionOp, firOpBuilder, loadVal,
+                                  reductionVal, &convertOp);
+                  removeStoreOp(reductionOp, reductionVal);
+                } else if (mlir::Operation *reductionOp =
+                               findReductionChain(loadVal, &reductionVal)) {
+                  updateReduction(reductionOp, firOpBuilder, loadVal,
+                                  reductionVal);
                 }
               }
             }
           }
         }
       } else if (const auto *reductionIntrinsic =
-                     std::get_if<Fortran::parser::ProcedureDesignator>(
-                         &redOperator.u)) {
+                     std::get_if<clause::ProcedureDesignator>(&redOperator.u)) {
         if (!ReductionProcessor::supportedIntrinsicProcReduction(
                 *reductionIntrinsic))
           continue;
         ReductionProcessor::ReductionIdentifier redId =
             ReductionProcessor::getReductionType(*reductionIntrinsic);
-        for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
-          if (const auto *name{
-                  Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
-            if (const Fortran::semantics::Symbol * symbol{name->symbol}) {
-              mlir::Value reductionVal = converter.getSymbolAddress(*symbol);
-              if (auto declOp = reductionVal.getDefiningOp<hlfir::DeclareOp>())
-                reductionVal = declOp.getBase();
-              for (const mlir::OpOperand &reductionValUse :
-                   reductionVal.getUses()) {
-                if (auto loadOp = mlir::dyn_cast<fir::LoadOp>(
-                        reductionValUse.getOwner())) {
-                  mlir::Value loadVal = loadOp.getRes();
-                  // Max is lowered as a compare -> select.
-                  // Match the pattern here.
-                  mlir::Operation *reductionOp =
-                      findReductionChain(loadVal, &reductionVal);
-                  if (reductionOp == nullptr)
-                    continue;
-
-                  if (redId == ReductionProcessor::ReductionIdentifier::MAX ||
-                      redId == ReductionProcessor::ReductionIdentifier::MIN) {
-                    assert(mlir::isa<mlir::arith::SelectOp>(reductionOp) &&
-                           "Selection Op not found in reduction intrinsic");
-                    mlir::Operation *compareOp =
-                        getCompareFromReductionOp(reductionOp, loadVal);
-                    updateReduction(compareOp, firOpBuilder, loadVal,
-                                    reductionVal);
-                  }
-                  if (redId == ReductionProcessor::ReductionIdentifier::IOR ||
-                      redId == ReductionProcessor::ReductionIdentifier::IEOR ||
-                      redId == ReductionProcessor::ReductionIdentifier::IAND) {
-                    updateReduction(reductionOp, firOpBuilder, loadVal,
-                                    reductionVal);
-                  }
+        for (const Object &object : objects) {
+          if (const Fortran::semantics::Symbol *symbol = object.id()) {
+            mlir::Value reductionVal = converter.getSymbolAddress(*symbol);
+            if (auto declOp = reductionVal.getDefiningOp<hlfir::DeclareOp>())
+              reductionVal = declOp.getBase();
+            for (const mlir::OpOperand &reductionValUse :
+                 reductionVal.getUses()) {
+              if (auto loadOp =
+                      mlir::dyn_cast<fir::LoadOp>(reductionValUse.getOwner())) {
+                mlir::Value loadVal = loadOp.getRes();
+                // Max is lowered as a compare -> select.
+                // Match the pattern here.
+                mlir::Operation *reductionOp =
+                    findReductionChain(loadVal, &reductionVal);
+                if (reductionOp == nullptr)
+                  continue;
+
+                if (redId == ReductionProcessor::ReductionIdentifier::MAX ||
+                    redId == ReductionProcessor::ReductionIdentifier::MIN) {
+                  assert(mlir::isa<mlir::arith::SelectOp>(reductionOp) &&
+                         "Selection Op not found in reduction intrinsic");
+                  mlir::Operation *compareOp =
+                      getCompareFromReductionOp(reductionOp, loadVal);
+                  updateReduction(compareOp, firOpBuilder, loadVal,
+                                  reductionVal);
+                }
+                if (redId == ReductionProcessor::ReductionIdentifier::IOR ||
+                    redId == ReductionProcessor::ReductionIdentifier::IEOR ||
+                    redId == ReductionProcessor::ReductionIdentifier::IAND) {
+                  updateReduction(reductionOp, firOpBuilder, loadVal,
+                                  reductionVal);
                 }
               }
             }
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index e6a63dd..6dc467c 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -30,9 +30,9 @@ namespace lower {
 namespace omp {
 
 ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType(
-    const Fortran::parser::ProcedureDesignator &pd) {
+    const omp::clause::ProcedureDesignator &pd) {
   auto redType = llvm::StringSwitch<std::optional<ReductionIdentifier>>(
-                     ReductionProcessor::getRealName(pd).ToString())
+                     getRealName(pd.v.id()).ToString())
                      .Case("max", ReductionIdentifier::MAX)
                      .Case("min", ReductionIdentifier::MIN)
                      .Case("iand", ReductionIdentifier::IAND)
@@ -44,21 +44,21 @@ ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType(
 }
 
 ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType(
-    Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp) {
+    omp::clause::DefinedOperator::IntrinsicOperator intrinsicOp) {
   switch (intrinsicOp) {
-  case Fortran::parser::DefinedOperator::IntrinsicOperator::Add:
+  case omp::clause::DefinedOperator::IntrinsicOperator::Add:
     return ReductionIdentifier::ADD;
-  case Fortran::parser::DefinedOperator::IntrinsicOperator::Subtract:
+  case omp::clause::DefinedOperator::IntrinsicOperator::Subtract:
     return ReductionIdentifier::SUBTRACT;
-  case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply:
+  case omp::clause::DefinedOperator::IntrinsicOperator::Multiply:
     return ReductionIdentifier::MULTIPLY;
-  case Fortran::parser::DefinedOperator::IntrinsicOperator::AND:
+  case omp::clause::DefinedOperator::IntrinsicOperator::AND:
     return ReductionIdentifier::AND;
-  case Fortran::parser::DefinedOperator::IntrinsicOperator::EQV:
+  case omp::clause::DefinedOperator::IntrinsicOperator::EQV:
     return ReductionIdentifier::EQV;
-  case Fortran::parser::DefinedOperator::IntrinsicOperator::OR:
+  case omp::clause::DefinedOperator::IntrinsicOperator::OR:
     return ReductionIdentifier::OR;
-  case Fortran::parser::DefinedOperator::IntrinsicOperator::NEQV:
+  case omp::clause::DefinedOperator::IntrinsicOperator::NEQV:
     return ReductionIdentifier::NEQV;
   default:
     llvm_unreachable("unexpected intrinsic operator in reduction");
@@ -66,13 +66,11 @@ ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType(
 }
 
 bool ReductionProcessor::supportedIntrinsicProcReduction(
-    const Fortran::parser::ProcedureDesignator &pd) {
-  const auto *name{Fortran::parser::Unwrap<Fortran::parser::Name>(pd)};
-  assert(name && "Invalid Reduction Intrinsic.");
-  if (!name->symbol->GetUltimate().attrs().test(
-          Fortran::semantics::Attr::INTRINSIC))
+    const omp::clause::ProcedureDesignator &pd) {
+  Fortran::semantics::Symbol *sym = pd.v.id();
+  if (!sym->GetUltimate().attrs().test(Fortran::semantics::Attr::INTRINSIC))
     return false;
-  auto redType = llvm::StringSwitch<bool>(getRealName(name).ToString())
+  auto redType = llvm::StringSwitch<bool>(getRealName(sym).ToString())
                      .Case("max", true)
                      .Case("min", true)
                      .Case("iand", true)
@@ -99,24 +97,24 @@ std::string ReductionProcessor::getReductionName(llvm::StringRef name,
 }
 
 std::string ReductionProcessor::getReductionName(
-    Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp,
-    mlir::Type ty, bool isByRef) {
+    omp::clause::DefinedOperator::IntrinsicOperator intrinsicOp, mlir::Type ty,
+    bool isByRef) {
   std::string reductionName;
 
   switch (intrinsicOp) {
-  case Fortran::parser::DefinedOperator::IntrinsicOperator::Add:
+  case omp::clause::DefinedOperator::IntrinsicOperator::Add:
     reductionName = "add_reduction";
     break;
-  case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply:
+  case omp::clause::DefinedOperator::IntrinsicOperator::Multiply:
     reductionName = "multiply_reduction";
     break;
-  case Fortran::parser::DefinedOperator::IntrinsicOperator::AND:
+  case omp::clause::DefinedOperator::IntrinsicOperator::AND:
     return "and_reduction";
-  case Fortran::parser::DefinedOperator::IntrinsicOperator::EQV:
+  case omp::clause::DefinedOperator::IntrinsicOperator::EQV:
     return "eqv_reduction";
-  case Fortran::parser::DefinedOperator::IntrinsicOperator::OR:
+  case omp::clause::DefinedOperator::IntrinsicOperator::OR:
     return "or_reduction";
-  case Fortran::parser::DefinedOperator::IntrinsicOperator::NEQV:
+  case omp::clause::DefinedOperator::IntrinsicOperator::NEQV:
     return "neqv_reduction";
   default:
     reductionName = "other_reduction";
@@ -364,7 +362,7 @@ bool ReductionProcessor::doReductionByRef(
 void ReductionProcessor::addReductionDecl(
     mlir::Location currentLocation,
     Fortran::lower::AbstractConverter &converter,
-    const Fortran::parser::OmpReductionClause &reduction,
+    const omp::clause::Reduction &reduction,
     llvm::SmallVectorImpl<mlir::Value> &reductionVars,
     llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
     llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
@@ -372,13 +370,12 @@ void ReductionProcessor::addReductionDecl(
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::omp::ReductionDeclareOp decl;
   const auto &redOperator{
-      std::get<Fortran::parser::OmpReductionOperator>(reduction.t)};
-  const auto &objectList{std::get<Fortran::parser::OmpObjectList>(reduction.t)};
+      std::get<omp::clause::ReductionOperator>(reduction.t)};
+  const auto &objectList{std::get<omp::ObjectList>(reduction.t)};
 
-  if (!std::holds_alternative<Fortran::parser::DefinedOperator>(
-          redOperator.u)) {
+  if (!std::holds_alternative<omp::clause::DefinedOperator>(redOperator.u)) {
     if (const auto *reductionIntrinsic =
-            std::get_if<Fortran::parser::ProcedureDesignator>(&redOperator.u)) {
+            std::get_if<omp::clause::ProcedureDesignator>(&redOperator.u)) {
       if (!ReductionProcessor::supportedIntrinsicProcReduction(
               *reductionIntrinsic)) {
         return;
@@ -388,27 +385,23 @@ void ReductionProcessor::addReductionDecl(
     }
   }
 
-  // initial pass to collect all recuction vars so we can figure out if this
+  // initial pass to collect all reduction vars so we can figure out if this
   // should happen byref
-  for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
-    if (const auto *name{
-            Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
-      if (const Fortran::semantics::Symbol * symbol{name->symbol}) {
-        if (reductionSymbols)
-          reductionSymbols->push_back(symbol);
-        mlir::Value symVal = converter.getSymbolAddress(*symbol);
-        if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
-          symVal = declOp.getBase();
-        reductionVars.push_back(symVal);
-      }
-    }
+  for (const Object &object : objectList) {
+    const Fortran::semantics::Symbol *symbol = object.id();
+    if (reductionSymbols)
+      reductionSymbols->push_back(symbol);
+    mlir::Value symVal = converter.getSymbolAddress(*symbol);
+    if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
+      symVal = declOp.getBase();
+    reductionVars.push_back(symVal);
   }
   const bool isByRef = doReductionByRef(reductionVars);
 
   if (const auto &redDefinedOp =
-          std::get_if<Fortran::parser::DefinedOperator>(&redOperator.u)) {
+          std::get_if<omp::clause::DefinedOperator>(&redOperator.u)) {
     const auto &intrinsicOp{
-        std::get<Fortran::parser::DefinedOperator::IntrinsicOperator>(
+        std::get<omp::clause::DefinedOperator::IntrinsicOperator>(
             redDefinedOp->u)};
     ReductionIdentifier redId = getReductionType(intrinsicOp);
     switch (redId) {
@@ -424,73 +417,63 @@ void ReductionProcessor::addReductionDecl(
            "Reduction of some intrinsic operators is not supported");
       break;
     }
-    for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
-      if (const auto *name{
-              Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
-        if (const Fortran::semantics::Symbol * symbol{name->symbol}) {
-          mlir::Value symVal = converter.getSymbolAddress(*symbol);
-          if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
-            symVal = declOp.getBase();
-          auto redType = symVal.getType().cast<fir::ReferenceType>();
-          if (redType.getEleTy().isa<fir::LogicalType>())
-            decl = createReductionDecl(
-                firOpBuilder,
-                getReductionName(intrinsicOp, firOpBuilder.getI1Type(),
-                                 isByRef),
-                redId, redType, currentLocation, isByRef);
-          else if (redType.getEleTy().isIntOrIndexOrFloat()) {
-            decl = createReductionDecl(
-                firOpBuilder, getReductionName(intrinsicOp, redType, isByRef),
-                redId, redType, currentLocation, isByRef);
-          } else {
-            TODO(currentLocation, "Reduction of some types is not supported");
-          }
-          reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
-              firOpBuilder.getContext(), decl.getSymName()));
-        }
+
+    for (const Object &object : objectList) {
+      const Fortran::semantics::Symbol *symbol = object.id();
+      mlir::Value symVal = converter.getSymbolAddress(*symbol);
+      if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
+        symVal = declOp.getBase();
+      auto redType = symVal.getType().cast<fir::ReferenceType>();
+      if (redType.getEleTy().isa<fir::LogicalType>())
+        decl = createReductionDecl(
+            firOpBuilder,
+            getReductionName(intrinsicOp, firOpBuilder.getI1Type(), isByRef),
+            redId, redType, currentLocation, isByRef);
+      else if (redType.getEleTy().isIntOrIndexOrFloat()) {
+        decl = createReductionDecl(
+            firOpBuilder, getReductionName(intrinsicOp, redType, isByRef),
+            redId, redType, currentLocation, isByRef);
+      } else {
+        TODO(currentLocation, "Reduction of some types is not supported");
       }
+      reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
+          firOpBuilder.getContext(), decl.getSymName()));
     }
   } else if (const auto *reductionIntrinsic =
-                 std::get_if<Fortran::parser::ProcedureDesignator>(
+                 std::get_if<omp::clause::ProcedureDesignator>(
                      &redOperator.u)) {
     if (ReductionProcessor::supportedIntrinsicProcReduction(
             *reductionIntrinsic)) {
       ReductionProcessor::ReductionIdentifier redId =
           ReductionProcessor::getReductionType(*reductionIntrinsic);
-      for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
-        if (const auto *name{
-                Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
-          if (const Fortran::semantics::Symbol * symbol{name->symbol}) {
-            mlir::Value symVal = converter.getSymbolAddress(*symbol);
-            if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
-              symVal = declOp.getBase();
-            auto redType = symVal.getType().cast<fir::ReferenceType>();
-            assert(redType.getEleTy().isIntOrIndexOrFloat() &&
-                   "Unsupported reduction type");
-            decl = createReductionDecl(
-                firOpBuilder,
-                getReductionName(getRealName(*reductionIntrinsic).ToString(),
-                                 redType, isByRef),
-                redId, redType, currentLocation, isByRef);
-            reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
-                firOpBuilder.getContext(), decl.getSymName()));
-          }
-        }
+      for (const Object &object : objectList) {
+        const Fortran::semantics::Symbol *symbol = object.id();
+        mlir::Value symVal = converter.getSymbolAddress(*symbol);
+        if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
+          symVal = declOp.getBase();
+        auto redType = symVal.getType().cast<fir::ReferenceType>();
+        assert(redType.getEleTy().isIntOrIndexOrFloat() &&
+               "Unsupported reduction type");
+        decl = createReductionDecl(
+            firOpBuilder,
+            getReductionName(getRealName(*reductionIntrinsic).ToString(),
+                             redType, isByRef),
+            redId, redType, currentLocation, isByRef);
+        reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
+            firOpBuilder.getContext(), decl.getSymName()));
       }
     }
   }
 }
 
 const Fortran::semantics::SourceName
-ReductionProcessor::getRealName(const Fortran::parser::Name *name) {
-  return name->symbol->GetUltimate().name();
+ReductionProcessor::getRealName(const Fortran::semantics::Symbol *symbol) {
+  return symbol->GetUltimate().name();
 }
 
-const Fortran::semantics::SourceName ReductionProcessor::getRealName(
-    const Fortran::parser::ProcedureDesignator &pd) {
-  const auto *name{Fortran::parser::Unwrap<Fortran::parser::Name>(pd)};
-  assert(name && "Invalid Reduction Intrinsic.");
-  return getRealName(name);
+const Fortran::semantics::SourceName
+ReductionProcessor::getRealName(const omp::clause::ProcedureDesignator &pd) {
+  return getRealName(pd.v.id());
 }
 
 int ReductionProcessor::getOperationIdentity(ReductionIdentifier redId,
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h
index 679580f..ef63394 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.h
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.h
@@ -13,6 +13,7 @@
 #ifndef FORTRAN_LOWER_REDUCTIONPROCESSOR_H
 #define FORTRAN_LOWER_REDUCTIONPROCESSOR_H
 
+#include "Clauses.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Parser/parse-tree.h"
@@ -58,19 +59,19 @@ public:
   };
 
   static ReductionIdentifier
-  getReductionType(const Fortran::parser::ProcedureDesignator &pd);
+  getReductionType(const omp::clause::ProcedureDesignator &pd);
 
-  static ReductionIdentifier getReductionType(
-      Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp);
+  static ReductionIdentifier
+  getReductionType(omp::clause::DefinedOperator::IntrinsicOperator intrinsicOp);
 
-  static bool supportedIntrinsicProcReduction(
-      const Fortran::parser::ProcedureDesignator &pd);
+  static bool
+  supportedIntrinsicProcReduction(const omp::clause::ProcedureDesignator &pd);
 
   static const Fortran::semantics::SourceName
-  getRealName(const Fortran::parser::Name *name);
+  getRealName(const Fortran::semantics::Symbol *symbol);
 
   static const Fortran::semantics::SourceName
-  getRealName(const Fortran::parser::ProcedureDesignator &pd);
+  getRealName(const omp::clause::ProcedureDesignator &pd);
 
   static bool
   doReductionByRef(const llvm::SmallVectorImpl<mlir::Value> &reductionVars);
@@ -78,9 +79,9 @@ public:
   static std::string getReductionName(llvm::StringRef name, mlir::Type ty,
                                       bool isByRef);
 
-  static std::string getReductionName(
-      Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp,
-      mlir::Type ty, bool isByRef);
+  static std::string
+  getReductionName(omp::clause::DefinedOperator::IntrinsicOperator intrinsicOp,
+                   mlir::Type ty, bool isByRef);
 
   /// This function returns the identity value of the operator \p
   /// reductionOpName. For example:
@@ -119,7 +120,7 @@ public:
   static void
   addReductionDecl(mlir::Location currentLocation,
                    Fortran::lower::AbstractConverter &converter,
-                   const Fortran::parser::OmpReductionClause &reduction,
+                   const omp::clause::Reduction &reduction,
                    llvm::SmallVectorImpl<mlir::Value> &reductionVars,
                    llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
                    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 49517f6..fa4a51e 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Utils.h"
+#include "Clauses.h"
 
 #include <flang/Lower/AbstractConverter.h>
 #include <flang/Lower/ConvertType.h>
@@ -34,19 +35,33 @@ namespace Fortran {
 namespace lower {
 namespace omp {
 
-void genObjectList(const Fortran::parser::OmpObjectList &objectList,
+void genObjectList(const ObjectList &objects,
                    Fortran::lower::AbstractConverter &converter,
                    llvm::SmallVectorImpl<mlir::Value> &operands) {
+  for (const Object &object : objects) {
+    const Fortran::semantics::Symbol *sym = object.id();
+    assert(sym && "Expected Symbol");
+    if (mlir::Value variable = converter.getSymbolAddress(*sym)) {
+      operands.push_back(variable);
+    } else if (const auto *details =
+                   sym->detailsIf<Fortran::semantics::HostAssocDetails>()) {
+      operands.push_back(converter.getSymbolAddress(details->symbol()));
+      converter.copySymbolBinding(details->symbol(), *sym);
+    }
+  }
+}
+
+void genObjectList2(const Fortran::parser::OmpObjectList &objectList,
+                    Fortran::lower::AbstractConverter &converter,
+                    llvm::SmallVectorImpl<mlir::Value> &operands) {
   auto addOperands = [&](Fortran::lower::SymbolRef sym) {
     const mlir::Value variable = converter.getSymbolAddress(sym);
     if (variable) {
       operands.push_back(variable);
-    } else {
-      if (const auto *details =
-              sym->detailsIf<Fortran::semantics::HostAssocDetails>()) {
-        operands.push_back(converter.getSymbolAddress(details->symbol()));
-        converter.copySymbolBinding(details->symbol(), sym);
-      }
+    } else if (const auto *details =
+                   sym->detailsIf<Fortran::semantics::HostAssocDetails>()) {
+      operands.push_back(converter.getSymbolAddress(details->symbol()));
+      converter.copySymbolBinding(details->symbol(), sym);
     }
   };
   for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
@@ -56,24 +71,10 @@ void genObjectList(const Fortran::parser::OmpObjectList &objectList,
 }
 
 void gatherFuncAndVarSyms(
-    const Fortran::parser::OmpObjectList &objList,
-    mlir::omp::DeclareTargetCaptureClause clause,
+    const ObjectList &objects, mlir::omp::DeclareTargetCaptureClause clause,
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) {
-  for (const Fortran::parser::OmpObject &ompObject : objList.v) {
-    Fortran::common::visit(
-        Fortran::common::visitors{
-            [&](const Fortran::parser::Designator &designator) {
-              if (const Fortran::parser::Name *name =
-                      Fortran::semantics::getDesignatorNameIfDataRef(
-                          designator)) {
-                symbolAndClause.emplace_back(clause, *name->symbol);
-              }
-            },
-            [&](const Fortran::parser::Name &name) {
-              symbolAndClause.emplace_back(clause, *name.symbol);
-            }},
-        ompObject.u);
-  }
+  for (const Object &object : objects)
+    symbolAndClause.emplace_back(clause, *object.id());
 }
 
 Fortran::semantics::Symbol *
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 76a15e8..176ab2b 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -9,6 +9,7 @@
 #ifndef FORTRAN_LOWER_OPENMPUTILS_H
 #define FORTRAN_LOWER_OPENMPUTILS_H
 
+#include "Clauses.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/Location.h"
 #include "mlir/IR/Value.h"
@@ -51,17 +52,20 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
                 bool isVal = false);
 
 void gatherFuncAndVarSyms(
-    const Fortran::parser::OmpObjectList &objList,
-    mlir::omp::DeclareTargetCaptureClause clause,
+    const ObjectList &objects, mlir::omp::DeclareTargetCaptureClause clause,
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause);
 
 Fortran::semantics::Symbol *
 getOmpObjectSymbol(const Fortran::parser::OmpObject &ompObject);
 
-void genObjectList(const Fortran::parser::OmpObjectList &objectList,
+void genObjectList(const ObjectList &objects,
                    Fortran::lower::AbstractConverter &converter,
                    llvm::SmallVectorImpl<mlir::Value> &operands);
 
+void genObjectList2(const Fortran::parser::OmpObjectList &objectList,
+                    Fortran::lower::AbstractConverter &converter,
+                    llvm::SmallVectorImpl<mlir::Value> &operands);
+
 } // namespace omp
 } // namespace lower
 } // namespace Fortran
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 12da741..f7327a2 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -208,6 +208,8 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() {
               .getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>()) {
     return ompOutlineableIface.getAllocaBlock();
   }
+  if (mlir::isa<mlir::omp::ReductionDeclareOp>(getRegion().getParentOp()))
+    return &getRegion().front();
   if (auto accRecipeIface =
           getRegion().getParentOfType<mlir::acc::RecipeInterface>()) {
     return accRecipeIface.getAllocaBlock(getRegion());
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 94fcfa3..eb8f513 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -177,6 +177,8 @@ static constexpr IntrinsicHandler handlers[]{
      /*isElemental=*/false},
     {"c_funloc", &I::genCFunLoc, {{{"x", asBox}}}, /*isElemental=*/false},
     {"c_loc", &I::genCLoc, {{{"x", asBox}}}, /*isElemental=*/false},
+    {"c_ptr_eq", &I::genCPtrCompare<mlir::arith::CmpIPredicate::eq>},
+    {"c_ptr_ne", &I::genCPtrCompare<mlir::arith::CmpIPredicate::ne>},
     {"ceiling", &I::genCeiling},
     {"char", &I::genChar},
     {"cmplx",
@@ -2797,6 +2799,23 @@ IntrinsicLibrary::genCLoc(mlir::Type resultType,
   return genCLocOrCFunLoc(builder, loc, resultType, args);
 }
 
+// C_PTR_EQ and C_PTR_NE
+template <mlir::arith::CmpIPredicate pred>
+fir::ExtendedValue
+IntrinsicLibrary::genCPtrCompare(mlir::Type resultType,
+                                 llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 2);
+  mlir::Value cPtr1 = fir::getBase(args[0]);
+  mlir::Value cPtrVal1 =
+      fir::factory::genCPtrOrCFunptrValue(builder, loc, cPtr1);
+  mlir::Value cPtr2 = fir::getBase(args[1]);
+  mlir::Value cPtrVal2 =
+      fir::factory::genCPtrOrCFunptrValue(builder, loc, cPtr2);
+  mlir::Value cmp =
+      builder.create<mlir::arith::CmpIOp>(loc, pred, cPtrVal1, cPtrVal2);
+  return builder.createConvert(loc, resultType, cmp);
+}
+
 // CEILING
 mlir::Value IntrinsicLibrary::genCeiling(mlir::Type resultType,
                                          llvm::ArrayRef<mlir::Value> args) {
@@ -5240,6 +5259,13 @@ mlir::Value IntrinsicLibrary::genModulo(mlir::Type resultType,
                                                  remainder);
   }
 
+  // F128 arith::RemFOp may be lowered to a runtime call that may be unsupported
+  // on the target, so generate a call to Fortran Runtime's ModuloReal16.
+  if (resultType == mlir::FloatType::getF128(builder.getContext()))
+    return builder.createConvert(
+        loc, resultType,
+        fir::runtime::genModulo(builder, loc, args[0], args[1]));
+
   auto remainder = builder.create<mlir::arith::RemFOp>(loc, args[0], args[1]);
   mlir::Value zero = builder.createRealZeroConstant(loc, remainder.getType());
   auto remainderIsNotZero = builder.create<mlir::arith::CmpFOp>(
diff --git a/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp b/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp
index b958a30..4dcbd13 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp
@@ -118,6 +118,20 @@ struct ForcedMod16 {
   }
 };
 
+/// Placeholder for real*16 version of Modulo Intrinsic
+struct ForcedModulo16 {
+  static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ModuloReal16));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto fltTy = mlir::FloatType::getF128(ctx);
+      auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+      auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+      return mlir::FunctionType::get(ctx, {fltTy, fltTy, strTy, intTy},
+                                     {fltTy});
+    };
+  }
+};
+
 /// Placeholder for real*10 version of Nearest Intrinsic
 struct ForcedNearest10 {
   static constexpr const char *name = ExpandAndQuoteKey(RTNAME(Nearest10));
@@ -323,6 +337,33 @@ mlir::Value fir::runtime::genMod(fir::FirOpBuilder &builder, mlir::Location loc,
   return builder.create<fir::CallOp>(loc, func, args).getResult(0);
 }
 
+/// Generate call to Modulo intrinsic runtime routine.
+mlir::Value fir::runtime::genModulo(fir::FirOpBuilder &builder,
+                                    mlir::Location loc, mlir::Value a,
+                                    mlir::Value p) {
+  mlir::func::FuncOp func;
+  mlir::Type fltTy = a.getType();
+
+  if (fltTy != p.getType())
+    fir::emitFatalError(loc, "arguments type mismatch in MOD");
+
+  // MODULO is lowered into math operations in intrinsics lowering,
+  // so genModulo() should only be used for F128 data type now.
+  if (fltTy.isF128())
+    func = fir::runtime::getRuntimeFunc<ForcedModulo16>(loc, builder);
+  else
+    fir::intrinsicTypeTODO(builder, fltTy, loc, "MODULO");
+
+  auto funcTy = func.getFunctionType();
+  auto sourceFile = fir::factory::locationToFilename(builder, loc);
+  auto sourceLine =
+      fir::factory::locationToLineNo(builder, loc, funcTy.getInput(3));
+  auto args = fir::runtime::createArguments(builder, loc, funcTy, a, p,
+                                            sourceFile, sourceLine);
+
+  return builder.create<fir::CallOp>(loc, func, args).getResult(0);
+}
+
 /// Generate call to Nearest intrinsic runtime routine.
 mlir::Value fir::runtime::genNearest(fir::FirOpBuilder &builder,
                                      mlir::Location loc, mlir::Value x,
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index f81a083..123eb6e 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -410,8 +410,15 @@ protected:
       mlir::ConversionPatternRewriter &rewriter) const {
     auto thisPt = rewriter.saveInsertionPoint();
     mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
-    mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp);
-    rewriter.setInsertionPointToStart(insertBlock);
+    if (mlir::isa<mlir::omp::ReductionDeclareOp>(parentOp)) {
+      // ReductionDeclareOp has multiple child regions. We want to get the first
+      // block of whichever of those regions we are currently in
+      mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
+      rewriter.setInsertionPointToStart(&parentRegion->front());
+    } else {
+      mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp);
+      rewriter.setInsertionPointToStart(insertBlock);
+    }
     auto size = genI32Constant(loc, rewriter, 1);
     unsigned allocaAs = getAllocaAddressSpace(rewriter);
     unsigned programAs = getProgramAddressSpace(rewriter);
diff --git a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp
index 0170b56..9d48a2f 100644
--- a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp
+++ b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp
@@ -298,8 +298,7 @@ public:
                                        .isa<fir::SequenceType>());
     });
     mlir::RewritePatternSet patterns(&context);
-    patterns.insert<EmboxConversion, ArrayCoorConversion, ReboxConversion,
-                    DeclareOpConversion>(&context);
+    fir::populatePreCGRewritePatterns(patterns);
     if (mlir::failed(
             mlir::applyPartialConversion(op, target, std::move(patterns)))) {
       mlir::emitError(mlir::UnknownLoc::get(&context),
@@ -327,3 +326,8 @@ public:
 std::unique_ptr<mlir::Pass> fir::createFirCodeGenRewritePass() {
   return std::make_unique<CodeGenRewrite>();
 }
+
+void fir::populatePreCGRewritePatterns(mlir::RewritePatternSet &patterns) {
+  patterns.insert<EmboxConversion, ArrayCoorConversion, ReboxConversion,
+                  DeclareOpConversion>(patterns.getContext());
+}
diff --git a/flang/lib/Parser/executable-parsers.cpp b/flang/lib/Parser/executable-parsers.cpp
index de2be01..07a570b 100644
--- a/flang/lib/Parser/executable-parsers.cpp
+++ b/flang/lib/Parser/executable-parsers.cpp
@@ -542,19 +542,19 @@ TYPE_CONTEXT_PARSER("UNLOCK statement"_en_US,
 // CUF-kernel-do-directive ->
 //     !$CUF KERNEL DO [ (scalar-int-constant-expr) ] <<< grid, block [, stream]
 //     >>> do-construct
-// grid -> * | scalar-int-expr | ( scalar-int-expr-list )
-// block -> * | scalar-int-expr | ( scalar-int-expr-list )
+// star-or-expr -> * | scalar-int-expr
+// grid -> * | scalar-int-expr | ( star-or-expr-list )
+// block -> * | scalar-int-expr | ( star-or-expr-list )
 // stream -> ( 0, | STREAM = ) scalar-int-expr
+constexpr auto starOrExpr{construct<CUFKernelDoConstruct::StarOrExpr>(
+    "*" >> pure<std::optional<ScalarIntExpr>>() ||
+    applyFunction(presentOptional<ScalarIntExpr>, scalarIntExpr))};
+constexpr auto gridOrBlock{parenthesized(nonemptyList(starOrExpr)) ||
+    applyFunction(singletonList<CUFKernelDoConstruct::StarOrExpr>, starOrExpr)};
 TYPE_PARSER(sourced(beginDirective >> "$CUF KERNEL DO"_tok >>
     construct<CUFKernelDoConstruct::Directive>(
-        maybe(parenthesized(scalarIntConstantExpr)),
-        "<<<" >>
-            ("*" >> pure<std::list<ScalarIntExpr>>() ||
-                parenthesized(nonemptyList(scalarIntExpr)) ||
-                applyFunction(singletonList<ScalarIntExpr>, scalarIntExpr)),
-        "," >> ("*" >> pure<std::list<ScalarIntExpr>>() ||
-                   parenthesized(nonemptyList(scalarIntExpr)) ||
-                   applyFunction(singletonList<ScalarIntExpr>, scalarIntExpr)),
+        maybe(parenthesized(scalarIntConstantExpr)), "<<<" >> gridOrBlock,
+        "," >> gridOrBlock,
         maybe((", 0 ,"_tok || ", STREAM ="_tok) >> scalarIntExpr) / ">>>" /
             endDirective)))
 TYPE_CONTEXT_PARSER("!$CUF KERNEL DO construct"_en_US,
diff --git a/flang/lib/Parser/misc-parsers.h b/flang/lib/Parser/misc-parsers.h
index e9b52b7..4a318e0 100644
--- a/flang/lib/Parser/misc-parsers.h
+++ b/flang/lib/Parser/misc-parsers.h
@@ -57,5 +57,10 @@ template <typename A> common::IfNoLvalue<std::list<A>, A> singletonList(A &&x) {
   result.emplace_back(std::move(x));
   return result;
 }
+
+template <typename A>
+common::IfNoLvalue<std::optional<A>, A> presentOptional(A &&x) {
+  return std::make_optional(std::move(x));
+}
 } // namespace Fortran::parser
 #endif
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 600aa01..baba486 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2729,6 +2729,13 @@ public:
   WALK_NESTED_ENUM(OmpOrderModifier, Kind) // OMP order-modifier
 #undef WALK_NESTED_ENUM
 
+  void Unparse(const CUFKernelDoConstruct::StarOrExpr &x) {
+    if (x.v) {
+      Walk(*x.v);
+    } else {
+      Word("*");
+    }
+  }
   void Unparse(const CUFKernelDoConstruct::Directive &x) {
     Word("!$CUF KERNEL DO");
     Walk(" (", std::get<std::optional<ScalarIntConstantExpr>>(x.t), ")");
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 215a3c9..6d58013 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -487,6 +487,9 @@ public:
       const auto namePair{
           currScope().try_emplace(name->source, Attrs{}, ProcEntityDetails{})};
       auto &newSymbol{*namePair.first->second};
+      if (context_.intrinsics().IsIntrinsic(name->ToString())) {
+        newSymbol.attrs().set(Attr::INTRINSIC);
+      }
       name->symbol = &newSymbol;
     };
     if (const auto *procD{parser::Unwrap<parser::ProcedureDesignator>(opr.u)}) {
diff --git a/flang/lib/Semantics/runtime-type-info.cpp b/flang/lib/Semantics/runtime-type-info.cpp
index 9845a19..15ea34c 100644
--- a/flang/lib/Semantics/runtime-type-info.cpp
+++ b/flang/lib/Semantics/runtime-type-info.cpp
@@ -1239,6 +1239,16 @@ void RuntimeTableBuilder::IncorporateDefinedIoGenericInterfaces(
 RuntimeDerivedTypeTables BuildRuntimeDerivedTypeTables(
     SemanticsContext &context) {
   RuntimeDerivedTypeTables result;
+  // Do not attempt to read __fortran_type_info.mod when compiling
+  // the module on which it depends.
+  const auto &allSources{context.allCookedSources().allSources()};
+  if (auto firstProv{allSources.GetFirstFileProvenance()}) {
+    if (const auto *srcFile{allSources.GetSourceFile(firstProv->start())}) {
+      if (srcFile->path().find("__fortran_builtins.f90") != std::string::npos) {
+        return result;
+      }
+    }
+  }
   result.schemata = context.GetBuiltinModule(typeInfoBuiltinModule);
   if (result.schemata) {
     RuntimeTableBuilder builder{context, result};
diff --git a/flang/module/__fortran_builtins.f90 b/flang/module/__fortran_builtins.f90
index 62b2010..3d3dbef 100644
--- a/flang/module/__fortran_builtins.f90
+++ b/flang/module/__fortran_builtins.f90
@@ -110,7 +110,7 @@ module __fortran_builtins
   public :: operator(==)
 
   interface operator(/=)
-    module procedure __builtin_c_ptr_eq
+    module procedure __builtin_c_ptr_ne
   end interface
   public :: operator(/=)
 
diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp
index b474c8c..7267540 100644
--- a/flang/runtime/edit-output.cpp
+++ b/flang/runtime/edit-output.cpp
@@ -9,6 +9,7 @@
 #include "edit-output.h"
 #include "emit-encoded.h"
 #include "utf.h"
+#include "flang/Common/real.h"
 #include "flang/Common/uint128.h"
 #include <algorithm>
 
@@ -700,7 +701,9 @@ bool RealOutputEditing<KIND>::EditEXOutput(const DataEdit &edit) {
   if ((editWidth == 0 && !edit.digits) || editDigits == 0) {
     // EX0 or EXw.0
     flags |= decimal::Minimize;
-    significantDigits = 28; // enough for 128-bit F.P.
+    static constexpr int maxSigHexDigits{
+        (common::PrecisionOfRealKind(16) + 3) / 4};
+    significantDigits = maxSigHexDigits;
   }
   auto converted{
       ConvertToHexadecimal(significantDigits, edit.modes.round, flags)};
diff --git a/flang/runtime/pointer.cpp b/flang/runtime/pointer.cpp
index 08a1223..b01735d 100644
--- a/flang/runtime/pointer.cpp
+++ b/flang/runtime/pointer.cpp
@@ -185,6 +185,7 @@ int RTDEF(PointerDeallocate)(Descriptor &pointer, bool hasStat,
   if (!pointer.IsAllocated()) {
     return ReturnError(terminator, StatBaseNull, errMsg, hasStat);
   }
+#if !defined(RT_DEVICE_COMPILATION)
   if (executionEnvironment.checkPointerDeallocation) {
     // Validate the footer.  This should fail if the pointer doesn't
     // span the entire object, or the object was not allocated as a
@@ -200,6 +201,7 @@ int RTDEF(PointerDeallocate)(Descriptor &pointer, bool hasStat,
           terminator, StatBadPointerDeallocation, errMsg, hasStat);
     }
   }
+#endif
   return ReturnError(terminator,
       pointer.Destroy(/*finalize=*/true, /*destroyPointers=*/true, &terminator),
       errMsg, hasStat);
diff --git a/flang/test/Fir/omp-reduction-embox-codegen.fir b/flang/test/Fir/omp-reduction-embox-codegen.fir
new file mode 100644
index 0000000..24bde53
--- /dev/null
+++ b/flang/test/Fir/omp-reduction-embox-codegen.fir
@@ -0,0 +1,36 @@
+// RUN: tco %s | FileCheck %s
+
+// the fir.embox in the init region is turned into an alloca for the box. Test
+// that CodeGen.cpp knows where to place an alloca when it is inside of an
+// omp.reduction.declare
+
+// regretably this has to be nonsense IR because we need the subsequent patches
+// to process anything useful
+
+omp.reduction.declare @test_reduction : !fir.ref<!fir.box<i32>> init {
+^bb0(%arg0: !fir.ref<!fir.box<i32>>):
+  %0 = fir.alloca !fir.box<i32>
+  %1 = fir.alloca i32
+  %2 = fir.embox %1 : (!fir.ref<i32>) -> !fir.box<i32>
+
+  // use the embox for something so it isn't removed
+  fir.store %2 to %0 : !fir.ref<!fir.box<i32>>
+
+  omp.yield(%0 : !fir.ref<!fir.box<i32>>)
+} combiner {
+^bb0(%arg0: !fir.ref<!fir.box<i32>>, %arg1: !fir.ref<!fir.box<i32>>):
+  %0 = fir.undefined !fir.ref<!fir.box<i32>>
+  omp.yield(%0 : !fir.ref<!fir.box<i32>>)
+}
+
+func.func @_QQmain() attributes {fir.bindc_name = "reduce"} {
+  %4 = fir.alloca !fir.box<i32>
+  omp.parallel byref reduction(@test_reduction %4 -> %arg0 : !fir.ref<!fir.box<i32>>) {
+    omp.terminator
+  }
+  return
+}
+
+// basically we are testing that there isn't a crash
+// CHECK-LABEL: define void @_QQmain
+// CHECK-NEXT:    alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8
diff --git a/flang/test/Lower/CUDA/cuda-kernel-calls.cuf b/flang/test/Lower/CUDA/cuda-kernel-calls.cuf
index c1e89d1..55b5246e 100644
--- a/flang/test/Lower/CUDA/cuda-kernel-calls.cuf
+++ b/flang/test/Lower/CUDA/cuda-kernel-calls.cuf
@@ -18,15 +18,17 @@ contains
 ! CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QMtest_callFhostEa"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 
     call dev_kernel0<<<10, 20>>>()
-! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}>>>()
+! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}>>>()
 
-    call dev_kernel0<<< __builtin_dim3(1,1), __builtin_dim3(32,1,1) >>>
+    call dev_kernel0<<< __builtin_dim3(1,1,4), __builtin_dim3(32,1,1) >>>
 ! CHECK: %[[ADDR_DIM3_GRID:.*]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_dim3.{{.*}}) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
 ! CHECK: %[[DIM3_GRID:.*]]:2 = hlfir.declare %[[ADDR_DIM3_GRID]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_dim3.0"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>)
 ! CHECK: %[[GRID_X:.*]] = hlfir.designate %[[DIM3_GRID]]#1{"x"}   : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<i32>
 ! CHECK: %[[GRID_X_LOAD:.*]] = fir.load %[[GRID_X]] : !fir.ref<i32>
 ! CHECK: %[[GRID_Y:.*]] = hlfir.designate %[[DIM3_GRID]]#1{"y"}   : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<i32>
 ! CHECK: %[[GRID_Y_LOAD:.*]] = fir.load %[[GRID_Y]] : !fir.ref<i32>
+! CHECK: %[[GRID_Z:.*]] = hlfir.designate %[[DIM3_GRID]]#1{"z"}   : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<i32>
+! CHECK: %[[GRID_Z_LOAD:.*]] = fir.load %[[GRID_Z]] : !fir.ref<i32>
 ! CHECK: %[[ADDR_DIM3_BLOCK:.*]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_dim3.{{.*}}) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
 ! CHECK: %[[DIM3_BLOCK:.*]]:2 = hlfir.declare %[[ADDR_DIM3_BLOCK]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_dim3.1"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>)
 ! CHECK: %[[BLOCK_X:.*]] = hlfir.designate %[[DIM3_BLOCK]]#1{"x"}   : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<i32>
@@ -35,16 +37,16 @@ contains
 ! CHECK: %[[BLOCK_Y_LOAD:.*]] = fir.load %[[BLOCK_Y]] : !fir.ref<i32>
 ! CHECK: %[[BLOCK_Z:.*]] = hlfir.designate %[[DIM3_BLOCK]]#1{"z"}   : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<i32>
 ! CHECK: %[[BLOCK_Z_LOAD:.*]] = fir.load %[[BLOCK_Z]] : !fir.ref<i32>
-! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%[[GRID_X_LOAD]], %[[GRID_Y_LOAD]], %[[BLOCK_X_LOAD]], %[[BLOCK_Y_LOAD]], %[[BLOCK_Z_LOAD]]>>>()
+! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%[[GRID_X_LOAD]], %[[GRID_Y_LOAD]], %[[GRID_Z_LOAD]], %[[BLOCK_X_LOAD]], %[[BLOCK_Y_LOAD]], %[[BLOCK_Z_LOAD]]>>>()
 
     call dev_kernel0<<<10, 20, 2>>>()
-! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}>>>() 
+! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}>>>() 
 
     call dev_kernel0<<<10, 20, 2, 0>>>()
-! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}, %c0{{.*}}>>>()
+! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}, %c0{{.*}}>>>()
 
     call dev_kernel1<<<1, 32>>>(a)
-! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel1<<<%c1{{.*}}, %c1{{.*}}, %c32{{.*}}, %c1{{.*}}, %c1{{.*}}>>>(%1#1 : !fir.ref<f32>)
+! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel1<<<%c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c32{{.*}}, %c1{{.*}}, %c1{{.*}}>>>(%1#1 : !fir.ref<f32>)
   end
 
 end
diff --git a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
index db628fe..c017561 100644
--- a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
+++ b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
@@ -42,10 +42,7 @@ subroutine sub1()
 ! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
 ! CHECK: {n = 2 : i64}
 
-! TODO: currently these trigger error in the parser
+! TODO: lowering for these cases
 ! !$cuf kernel do(2) <<< (1,*), (256,1) >>>
 ! !$cuf kernel do(2) <<< (*,*), (32,4) >>>
 end
-
-
-
diff --git a/flang/test/Lower/Intrinsics/c_ptr_eq_ne.f90 b/flang/test/Lower/Intrinsics/c_ptr_eq_ne.f90
new file mode 100644
index 0000000..3846873
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/c_ptr_eq_ne.f90
@@ -0,0 +1,56 @@
+! Test C_PTR_EQ and C_PTR_NE lowering.
+! RUN: bbc -emit-hlfir -o - %s | FileCheck %s
+
+function test_c_ptr_eq(ptr1, ptr2)
+  use, intrinsic :: iso_c_binding
+  type(c_ptr), intent(in) :: ptr1, ptr2
+  logical :: test_c_ptr_eq
+  test_c_ptr_eq = (ptr1 .eq. ptr2)
+end
+
+! CHECK-LABEL: func.func @_QPtest_c_ptr_eq(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> {fir.bindc_name = "ptr1"}, %[[ARG1:.*]]: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> {fir.bindc_name = "ptr2"}) -> !fir.logical<4> {
+! CHECK: %[[DECL_ARG0:.*]]:2 = hlfir.declare %[[ARG0]] {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QFtest_c_ptr_eqEptr1"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) -> (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
+! CHECK: %[[DECL_ARG1:.*]]:2 = hlfir.declare %[[ARG1]] {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QFtest_c_ptr_eqEptr2"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) -> (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
+! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.logical<4> {bindc_name = "test_c_ptr_eq", uniq_name = "_QFtest_c_ptr_eqEtest_c_ptr_eq"}
+! CHECK: %[[DECL_RET:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFtest_c_ptr_eqEtest_c_ptr_eq"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
+! CHECK: %[[COORD_ADDRESS0:.*]] = fir.coordinate_of %[[DECL_ARG0]]#1, %[[FIELD_ADDRESS]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
+! CHECK: %[[ADDRESS0:.*]] = fir.load %[[COORD_ADDRESS0]] : !fir.ref<i64>
+! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
+! CHECK: %[[COORD_ADDRESS1:.*]] = fir.coordinate_of %[[DECL_ARG1]]#1, %[[FIELD_ADDRESS]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
+! CHECK: %[[ADDRESS1:.*]] = fir.load %[[COORD_ADDRESS1]] : !fir.ref<i64>
+! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[ADDRESS0]], %[[ADDRESS1]] : i64
+! CHECK: %[[RES:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4>
+! CHECK: %[[NO_REASSOC_RES:.*]] = hlfir.no_reassoc %[[RES]] : !fir.logical<4>
+! CHECK: hlfir.assign %[[NO_REASSOC_RES]] to %[[DECL_RET]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[LOAD_RET:.*]] = fir.load %[[DECL_RET]]#1 : !fir.ref<!fir.logical<4>>
+! CHECK: return %[[LOAD_RET]] : !fir.logical<4>
+! CHECK: }
+
+function test_c_ptr_ne(ptr1, ptr2)
+  use, intrinsic :: iso_c_binding
+  type(c_ptr), intent(in) :: ptr1, ptr2
+  logical :: test_c_ptr_ne
+  test_c_ptr_ne = (ptr1 .ne. ptr2)
+end
+
+! CHECK-LABEL: func.func @_QPtest_c_ptr_ne(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> {fir.bindc_name = "ptr1"}, %[[ARG1:.*]]: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> {fir.bindc_name = "ptr2"}) -> !fir.logical<4> {
+! CHECK: %[[DECL_ARG0:.*]]:2 = hlfir.declare %[[ARG0]] {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QFtest_c_ptr_neEptr1"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) -> (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
+! CHECK: %[[DECL_ARG1:.*]]:2 = hlfir.declare %[[ARG1]] {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QFtest_c_ptr_neEptr2"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) -> (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
+! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.logical<4> {bindc_name = "test_c_ptr_ne", uniq_name = "_QFtest_c_ptr_neEtest_c_ptr_ne"}
+! CHECK: %[[DECL_RET:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFtest_c_ptr_neEtest_c_ptr_ne"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
+! CHECK: %[[COORD_ADDRESS0:.*]] = fir.coordinate_of %[[DECL_ARG0]]#1, %[[FIELD_ADDRESS]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
+! CHECK: %[[ADDRESS0:.*]] = fir.load %[[COORD_ADDRESS0]] : !fir.ref<i64>
+! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
+! CHECK: %[[COORD_ADDRESS1:.*]] = fir.coordinate_of %[[DECL_ARG1]]#1, %[[FIELD_ADDRESS]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
+! CHECK: %[[ADDRESS1:.*]] = fir.load %[[COORD_ADDRESS1]] : !fir.ref<i64>
+! CHECK: %[[CMP:.*]] = arith.cmpi ne, %[[ADDRESS0]], %[[ADDRESS1]] : i64
+! CHECK: %[[RES:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4>
+! CHECK: %[[NO_REASSOC_RES:.*]] = hlfir.no_reassoc %[[RES]] : !fir.logical<4>
+! CHECK: hlfir.assign %[[NO_REASSOC_RES]] to %[[DECL_RET]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[LOAD_RET:.*]] = fir.load %[[DECL_RET]]#1 : !fir.ref<!fir.logical<4>>
+! CHECK: return %[[LOAD_RET]] : !fir.logical<4>
+! CHECK: }
diff --git a/flang/test/Lower/Intrinsics/modulo.f90 b/flang/test/Lower/Intrinsics/modulo.f90
index 001e307..383cb34 100644
--- a/flang/test/Lower/Intrinsics/modulo.f90
+++ b/flang/test/Lower/Intrinsics/modulo.f90
@@ -40,17 +40,6 @@ end subroutine
 ! CHECK-SAME: %[[arg0:.*]]: !fir.ref<f128>{{.*}}, %[[arg1:.*]]: !fir.ref<f128>{{.*}}, %[[arg2:.*]]: !fir.ref<f128>{{.*}}) {
 subroutine modulo_testr16(r, a, p)
   real(16) :: r, a, p
-  ! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<f128>
-  ! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<f128>
-  ! CHECK-DAG: %[[rem:.*]] = arith.remf %[[a]], %[[p]] {{.*}}: f128
-  ! CHECK-DAG: %[[zero:.*]] = arith.constant 0.000000e+00 : f128
-  ! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpf une, %[[rem]], %[[zero]] {{.*}} : f128
-  ! CHECK-DAG: %[[aNeg:.*]] = arith.cmpf olt, %[[a]], %[[zero]] {{.*}} : f128
-  ! CHECK-DAG: %[[pNeg:.*]] = arith.cmpf olt, %[[p]], %[[zero]] {{.*}} : f128
-  ! CHECK-DAG: %[[signDifferent:.*]] = arith.xori %[[aNeg]], %[[pNeg]] : i1
-  ! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
-  ! CHECK-DAG: %[[remPlusP:.*]] = arith.addf %[[rem]], %[[p]] {{.*}}: f128
-  ! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : f128
-  ! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<f128>
+  ! CHECK: fir.call @_FortranAModuloReal16({{.*}}){{.*}}: (f128, f128, !fir.ref<i8>, i32) -> f128
   r = modulo(a, p)
 end subroutine
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-rename.f90 b/flang/test/Lower/OpenMP/parallel-reduction-rename.f90
new file mode 100644
index 0000000..86db2ff
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-reduction-rename.f90
@@ -0,0 +1,36 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s
+
+module m1
+  intrinsic max
+end module m1
+program main
+  use m1, ren=>max
+  n=0
+  !$omp parallel reduction(ren:n)
+  print *, "par"
+  !$omp end parallel
+end program main
+
+! test that we understood that this should be a max reduction
+
+! CHECK-LABEL:   omp.reduction.declare @max_i_32 : i32 init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: i32):
+! CHECK:           %[[VAL_1:.*]] = arith.constant -2147483648 : i32
+! CHECK:           omp.yield(%[[VAL_1]] : i32)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32):
+! CHECK:           %[[VAL_2:.*]] = arith.maxsi %[[VAL_0]], %[[VAL_1]] : i32
+! CHECK:           omp.yield(%[[VAL_2]] : i32)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QQmain() attributes {fir.bindc_name = "main"} {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFEn"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+! CHECK:           hlfir.assign %[[VAL_2]] to %[[VAL_1]]#0 : i32, !fir.ref<i32>
+! CHECK:           omp.parallel reduction(@max_i_32 %[[VAL_1]]#0 -> %[[VAL_3:.*]] : !fir.ref<i32>) {
+! ...
+! CHECK:             omp.terminator
+
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90
index 9289973..86fb067 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90
@@ -17,8 +17,16 @@ print *,r
 
 end program
 
-! TODO: the reduction is not curently lowered correctly. This test is checking
-! that we do not crash and we still produce the same broken IR as before.
+! CHECK-LABEL:   omp.reduction.declare @min_i_32 : i32 init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: i32):
+! CHECK:           %[[VAL_1:.*]] = arith.constant 2147483647 : i32
+! CHECK:           omp.yield(%[[VAL_1]] : i32)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32):
+! CHECK:           %[[VAL_2:.*]] = arith.minsi %[[VAL_0]], %[[VAL_1]] : i32
+! CHECK:           omp.yield(%[[VAL_2]] : i32)
+! CHECK:         }
 
 ! CHECK-LABEL:   func.func @_QQmain() attributes {fir.bindc_name = "reduce"} {
 ! CHECK:           %[[VAL_0:.*]] = fir.address_of(@_QFEi) : !fir.ref<i32>
@@ -31,10 +39,11 @@ end program
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 0 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop  for  (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-! CHECK:               fir.store %[[VAL_9]] to %[[VAL_5]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
-! CHECK:               hlfir.assign %[[VAL_10]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK:             omp.wsloop reduction(@min_i_32 %[[VAL_3]]#0 -> %[[VAL_9:.*]] : !fir.ref<i32>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_9]] {uniq_name = "_QFEr"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_12:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+! CHECK:               hlfir.assign %[[VAL_12]] to %[[VAL_11]]#0 : i32, !fir.ref<i32>
 ! CHECK:               omp.yield
 ! CHECK:             }
 ! CHECK:             omp.terminator
diff --git a/flang/test/Parser/cuf-sanity-tree.CUF b/flang/test/Parser/cuf-sanity-tree.CUF
index f6cf9bb..dc12759 100644
--- a/flang/test/Parser/cuf-sanity-tree.CUF
+++ b/flang/test/Parser/cuf-sanity-tree.CUF
@@ -144,11 +144,11 @@ include "cuf-sanity-common"
 !CHECK: | | | | | | EndDoStmt -> 
 !CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> CUFKernelDoConstruct
 !CHECK: | | | | | Directive
-!CHECK: | | | | | | Scalar -> Integer -> Expr = '1_4'
+!CHECK: | | | | | | StarOrExpr -> Scalar -> Integer -> Expr = '1_4'
 !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '1'
-!CHECK: | | | | | | Scalar -> Integer -> Expr = '2_4'
+!CHECK: | | | | | | StarOrExpr -> Scalar -> Integer -> Expr = '2_4'
 !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '2'
-!CHECK: | | | | | | Scalar -> Integer -> Expr = '3_4'
+!CHECK: | | | | | | StarOrExpr -> Scalar -> Integer -> Expr = '3_4'
 !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '3'
 !CHECK: | | | | | | Scalar -> Integer -> Expr = '1_4'
 !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '1'
diff --git a/flang/test/Semantics/OpenMP/reduction11.f90 b/flang/test/Semantics/OpenMP/reduction11.f90
new file mode 100644
index 0000000..3893fe7
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/reduction11.f90
@@ -0,0 +1,22 @@
+! RUN: %flang_fc1 -fopenmp -fdebug-dump-symbols -o - %s 2>&1 | FileCheck %s
+! Check intrinsic reduction symbols (in this case "max" are marked as INTRINSIC
+
+! CHECK: MainProgram scope: omp_reduction
+program omp_reduction
+  ! CHECK: i size=4 offset=0: ObjectEntity type: INTEGER(4)
+  integer i
+  ! CHECK: k size=4 offset=4: ObjectEntity type: INTEGER(4) init:10_4
+  integer :: k = 10
+  ! CHECK: m size=4 offset=8: ObjectEntity type: INTEGER(4) init:12_4
+  integer :: m = 12
+
+  ! CHECK: OtherConstruct scope
+  ! CHECK: i (OmpPrivate, OmpPreDetermined): HostAssoc
+  ! CHECK: k (OmpReduction): HostAssoc
+  ! CHECK: max, INTRINSIC: ProcEntity
+  !$omp parallel do  reduction(max:k)
+  do i=1,10
+    k = i
+  end do
+  !$omp end parallel do
+end program omp_reduction
diff --git a/flang/test/Semantics/cuf09.cuf b/flang/test/Semantics/cuf09.cuf
index dd70c3b..4bc9313 100644
--- a/flang/test/Semantics/cuf09.cuf
+++ b/flang/test/Semantics/cuf09.cuf
@@ -10,6 +10,15 @@ module m
 end
 
 program main
+  !$cuf kernel do <<< *, * >>> ! ok
+  do j = 1, 0
+  end do
+  !$cuf kernel do <<< (*), (*) >>> ! ok
+  do j = 1, 0
+  end do
+  !$cuf kernel do <<< (1,*), (2,*) >>> ! ok
+  do j = 1, 0
+  end do
   !ERROR: !$CUF KERNEL DO (1) must be followed by a DO construct with tightly nested outer levels of counted DO loops
   !$cuf kernel do <<< 1, 2 >>>
   do while (.false.)
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index ba6c664..a249583 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -63,7 +63,7 @@ if (NOT CMAKE_CROSSCOMPILING)
       COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR}
       COMMAND flang-new -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
         ${FLANG_SOURCE_DIR}/module/${filename}.f90
-      DEPENDS flang-new ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${depends}
+      DEPENDS flang-new ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends}
     )
     add_custom_command(OUTPUT ${base}.f18.mod
       DEPENDS ${base}.mod
diff --git a/libc/config/baremetal/api.td b/libc/config/baremetal/api.td
index 80d0e0b..d24c92e 100644
--- a/libc/config/baremetal/api.td
+++ b/libc/config/baremetal/api.td
@@ -52,6 +52,14 @@ def IntTypesAPI : PublicAPI<"inttypes.h"> {
   let Types = ["imaxdiv_t"];
 }
 
+def MathAPI : PublicAPI<"math.h"> {
+  let Types = ["double_t", "float_t"];
+}
+
+def StdIOAPI : PublicAPI<"stdio.h"> {
+  let Types = ["size_t"];
+}
+
 def StdlibAPI : PublicAPI<"stdlib.h"> {
   let Types = [
     "div_t",
@@ -60,7 +68,7 @@ def StdlibAPI : PublicAPI<"stdlib.h"> {
     "size_t",
     "__bsearchcompare_t",
     "__qsortcompare_t",
-    "__atexithandler_t",
+    "__qsortrcompare_t",
   ];
 }
 
diff --git a/libc/config/gpu/api.td b/libc/config/gpu/api.td
index 607b8b6d..adaf5bf 100644
--- a/libc/config/gpu/api.td
+++ b/libc/config/gpu/api.td
@@ -63,7 +63,6 @@ def StdIOAPI : PublicAPI<"stdio.h"> {
     SimpleMacroDef<"_IOFBF", "0">,
     SimpleMacroDef<"_IOLBF", "1">,
     SimpleMacroDef<"_IONBF", "2">,
-    SimpleMacroDef<"EOF", "-1">,
   ];
   let Types = ["size_t", "FILE"];
 }
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 7d69099..43c9e81 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -468,6 +468,7 @@ if(LIBC_TYPES_HAS_FLOAT128)
     libc.src.math.lrintf128
     libc.src.math.lroundf128
     libc.src.math.modff128
+    libc.src.math.nanf128
     libc.src.math.nextafterf128
     libc.src.math.rintf128
     libc.src.math.roundf128
diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 75432a2a..04d720d 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -76,7 +76,6 @@ def StdIOAPI : PublicAPI<"stdio.h"> {
     SimpleMacroDef<"_IOFBF", "0">,
     SimpleMacroDef<"_IOLBF", "1">,
     SimpleMacroDef<"_IONBF", "2">,
-    SimpleMacroDef<"EOF", "-1">,
   ];
   let Types = ["size_t", "FILE", "cookie_io_functions_t"];
 }
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index b1c9dd0..99ef84d 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -476,6 +476,7 @@ if(LIBC_TYPES_HAS_FLOAT128)
     libc.src.math.lrintf128
     libc.src.math.lroundf128
     libc.src.math.modff128
+    libc.src.math.nanf128
     libc.src.math.nextafterf128
     libc.src.math.rintf128
     libc.src.math.roundf128
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 4fb31c5..7e13a7c 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -429,6 +429,10 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.nexttoward
     libc.src.math.nexttowardf
     libc.src.math.nexttowardl
+    libc.src.math.nextdown
+    libc.src.math.nextdownf
+    libc.src.math.nextup
+    libc.src.math.nextupf
     libc.src.math.powf
     libc.src.math.remainderf
     libc.src.math.remainder
@@ -481,7 +485,10 @@ if(LIBC_TYPES_HAS_FLOAT128)
     libc.src.math.lrintf128
     libc.src.math.lroundf128
     libc.src.math.modff128
+    libc.src.math.nanf128
     libc.src.math.nextafterf128
+    libc.src.math.nextdownf128
+    libc.src.math.nextupf128
     libc.src.math.rintf128
     libc.src.math.roundf128
     libc.src.math.sqrtf128
diff --git a/libc/docs/fullbuild_mode.rst b/libc/docs/fullbuild_mode.rst
index 0af923d..b115101 100644
--- a/libc/docs/fullbuild_mode.rst
+++ b/libc/docs/fullbuild_mode.rst
@@ -6,7 +6,7 @@ Fullbuild Mode
 
 The *fullbuild* mode of LLVM's libc is the mode in which it is to be used as
 the only libc (as opposed to the :ref:`overlay_mode` in which it is used along
-with the system libc.) In to order use it as the only libc, one will have to
+with the system libc.) In order to use it as the only libc, one will have to
 build and install not only the static archives like ``libc.a`` from LLVM's libc,
 but also the start-up objects like ``crt1.o`` and the public headers.
 
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index ed54a7d..3240a8c 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -259,6 +259,8 @@ Basic Operations
 +--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
 | nanl         | |check| | |check| | |check| | |check| | |check| |         |         | |check| | |check| | |check| |         |         |
 +--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
+| nanf128      | |check| | |check| |         | |check| |         |         |         |         |         |         |         |         |
++--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
 | nearbyint    | |check| | |check| | |check| | |check| | |check| |         |         | |check| | |check| | |check| |         |         |
 +--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
 | nearbyintf   | |check| | |check| | |check| | |check| | |check| |         |         | |check| | |check| | |check| |         |         |
diff --git a/libc/include/llvm-libc-macros/stdio-macros.h b/libc/include/llvm-libc-macros/stdio-macros.h
index db747c5..4664801 100644
--- a/libc/include/llvm-libc-macros/stdio-macros.h
+++ b/libc/include/llvm-libc-macros/stdio-macros.h
@@ -9,6 +9,10 @@
 #ifndef LLVM_LIBC_MACROS_STDIO_MACROS_H
 #define LLVM_LIBC_MACROS_STDIO_MACROS_H
 
+#ifndef EOF
+#define EOF (-1)
+#endif
+
 #define BUFSIZ 1024
 
 #endif // LLVM_LIBC_MACROS_STDIO_MACROS_H
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index afe01b1..b438fc6 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -536,6 +536,14 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"nexttoward", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<LongDoubleType>]>,
           FunctionSpec<"nexttowardl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
 
+          FunctionSpec<"nextdown", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
+          FunctionSpec<"nextdownf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
+          GuardedFunctionSpec<"nextdownf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
+
+          FunctionSpec<"nextup", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
+          FunctionSpec<"nextupf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
+          GuardedFunctionSpec<"nextupf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
+
           FunctionSpec<"powf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
           FunctionSpec<"pow", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
 
@@ -559,6 +567,7 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"nanf", RetValSpec<FloatType>, [ArgSpec<ConstCharPtr>]>,
           FunctionSpec<"nan", RetValSpec<DoubleType>, [ArgSpec<ConstCharPtr>]>,
           FunctionSpec<"nanl", RetValSpec<LongDoubleType>, [ArgSpec<ConstCharPtr>]>,
+          GuardedFunctionSpec<"nanf128", RetValSpec<Float128Type>, [ArgSpec<ConstCharPtr>], "LIBC_TYPES_HAS_FLOAT128">,
       ]
   >;
 
diff --git a/libc/src/__support/FPUtil/ManipulationFunctions.h b/libc/src/__support/FPUtil/ManipulationFunctions.h
index f148d98..1301caa 100644
--- a/libc/src/__support/FPUtil/ManipulationFunctions.h
+++ b/libc/src/__support/FPUtil/ManipulationFunctions.h
@@ -230,6 +230,30 @@ LIBC_INLINE T nextafter(T from, U to) {
   return from_bits.get_val();
 }
 
+template <bool IsDown, typename T,
+          cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
+LIBC_INLINE constexpr T nextupdown(T x) {
+  constexpr Sign sign = IsDown ? Sign::NEG : Sign::POS;
+
+  FPBits<T> xbits(x);
+  if (xbits.is_nan() || xbits == FPBits<T>::max_normal(sign) ||
+      xbits == FPBits<T>::inf(sign))
+    return x;
+
+  using StorageType = typename FPBits<T>::StorageType;
+  if (x != T(0)) {
+    if (xbits.sign() == sign) {
+      xbits = FPBits<T>(StorageType(xbits.uintval() + 1));
+    } else {
+      xbits = FPBits<T>(StorageType(xbits.uintval() - 1));
+    }
+  } else {
+    xbits = FPBits<T>::min_subnormal(sign);
+  }
+
+  return xbits.get_val();
+}
+
 } // namespace fputil
 } // namespace LIBC_NAMESPACE
 
diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h
index d92d61e..df01e08 100644
--- a/libc/src/__support/UInt.h
+++ b/libc/src/__support/UInt.h
@@ -993,6 +993,68 @@ struct is_big_int<BigInt<Bits, Signed, T>> : cpp::true_type {};
 template <class T>
 LIBC_INLINE_VAR constexpr bool is_big_int_v = is_big_int<T>::value;
 
+// extensions of type traits to include BigInt
+
+// is_integral_or_big_int
+template <typename T>
+struct is_integral_or_big_int
+    : cpp::bool_constant<(cpp::is_integral_v<T> || is_big_int_v<T>)> {};
+
+template <typename T>
+LIBC_INLINE_VAR constexpr bool is_integral_or_big_int_v =
+    is_integral_or_big_int<T>::value;
+
+// make_big_int_unsigned
+template <typename T> struct make_big_int_unsigned;
+
+template <size_t Bits, bool Signed, typename T>
+struct make_big_int_unsigned<BigInt<Bits, Signed, T>>
+    : cpp::type_identity<BigInt<Bits, false, T>> {};
+
+template <typename T>
+using make_big_int_unsigned_t = typename make_big_int_unsigned<T>::type;
+
+// make_big_int_signed
+template <typename T> struct make_big_int_signed;
+
+template <size_t Bits, bool Signed, typename T>
+struct make_big_int_signed<BigInt<Bits, Signed, T>>
+    : cpp::type_identity<BigInt<Bits, true, T>> {};
+
+template <typename T>
+using make_big_int_signed_t = typename make_big_int_signed<T>::type;
+
+// make_integral_or_big_int_unsigned
+template <typename T, class = void> struct make_integral_or_big_int_unsigned;
+
+template <typename T>
+struct make_integral_or_big_int_unsigned<
+    T, cpp::enable_if_t<cpp::is_integral_v<T>>> : cpp::make_unsigned<T> {};
+
+template <typename T>
+struct make_integral_or_big_int_unsigned<T, cpp::enable_if_t<is_big_int_v<T>>>
+    : make_big_int_unsigned<T> {};
+
+template <typename T>
+using make_integral_or_big_int_unsigned_t =
+    typename make_integral_or_big_int_unsigned<T>::type;
+
+// make_integral_or_big_int_signed
+template <typename T, class = void> struct make_integral_or_big_int_signed;
+
+template <typename T>
+struct make_integral_or_big_int_signed<T,
+                                       cpp::enable_if_t<cpp::is_integral_v<T>>>
+    : cpp::make_signed<T> {};
+
+template <typename T>
+struct make_integral_or_big_int_signed<T, cpp::enable_if_t<is_big_int_v<T>>>
+    : make_big_int_signed<T> {};
+
+template <typename T>
+using make_integral_or_big_int_signed_t =
+    typename make_integral_or_big_int_signed<T>::type;
+
 namespace cpp {
 
 // Specialization of cpp::bit_cast ('bit.h') from T to BigInt.
diff --git a/libc/src/__support/blockstore.h b/libc/src/__support/blockstore.h
index dc5fdd1..ac0eb22 100644
--- a/libc/src/__support/blockstore.h
+++ b/libc/src/__support/blockstore.h
@@ -16,7 +16,6 @@
 #include <stdint.h>
 
 namespace LIBC_NAMESPACE {
-namespace cpp {
 
 // The difference between BlockStore a traditional vector types is that,
 // when more capacity is desired, a new block is added instead of allocating
@@ -203,7 +202,6 @@ void BlockStore<T, BLOCK_SIZE, REVERSE_ORDER>::destroy(
 template <typename T, size_t BLOCK_SIZE>
 using ReverseOrderBlockStore = BlockStore<T, BLOCK_SIZE, true>;
 
-} // namespace cpp
 } // namespace LIBC_NAMESPACE
 
 #endif // LLVM_LIBC_SRC___SUPPORT_BLOCKSTORE_H
diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h
index ac0bdd6..f72d00d 100644
--- a/libc/src/__support/integer_to_string.h
+++ b/libc/src/__support/integer_to_string.h
@@ -67,7 +67,7 @@
 #include "src/__support/CPP/span.h"
 #include "src/__support/CPP/string_view.h"
 #include "src/__support/CPP/type_traits.h"
-#include "src/__support/UInt.h" // is_big_int
+#include "src/__support/UInt.h" // make_integral_or_big_int_unsigned_t
 #include "src/__support/common.h"
 
 namespace LIBC_NAMESPACE {
@@ -150,18 +150,6 @@ public:
 using StringBufferWriter = StringBufferWriterImpl<true>;
 using BackwardStringBufferWriter = StringBufferWriterImpl<false>;
 
-template <typename T, class = void> struct IntegerWriterUnsigned {};
-
-template <typename T>
-struct IntegerWriterUnsigned<T, cpp::enable_if_t<cpp::is_integral_v<T>>> {
-  using type = cpp::make_unsigned_t<T>;
-};
-
-template <typename T>
-struct IntegerWriterUnsigned<T, cpp::enable_if_t<is_big_int_v<T>>> {
-  using type = typename T::unsigned_type;
-};
-
 } // namespace details
 
 namespace radix {
@@ -222,7 +210,7 @@ template <typename T, typename Fmt = radix::Dec> class IntegerToString {
   // An internal stateless structure that handles the number formatting logic.
   struct IntegerWriter {
     static_assert(cpp::is_integral_v<T> || is_big_int_v<T>);
-    using UNSIGNED_T = typename details::IntegerWriterUnsigned<T>::type;
+    using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>;
 
     LIBC_INLINE static char digit_char(uint8_t digit) {
       if (digit < 10)
diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h
index 073e1dc..2cf2cfb 100644
--- a/libc/src/__support/str_to_float.h
+++ b/libc/src/__support/str_to_float.h
@@ -1056,17 +1056,17 @@ hexadecimal_string_to_float(const char *__restrict src,
   return output;
 }
 
-LIBC_INLINE uint64_t
+template <class T>
+LIBC_INLINE typename fputil::FPBits<T>::StorageType
 nan_mantissa_from_ncharseq(const cpp::string_view ncharseq) {
-  uint64_t nan_mantissa = 0;
+  using FPBits = typename fputil::FPBits<T>;
+  using StorageType = typename FPBits::StorageType;
+
+  StorageType nan_mantissa = 0;
 
   if (ncharseq.data() != nullptr && isdigit(ncharseq[0])) {
-    // This is to prevent errors when StorageType is larger than 64
-    // bits, since strtointeger only supports up to 64 bits. This is
-    // actually more than is required by the specification, which says
-    // for the input type "NAN(n-char-sequence)" that "the meaning of
-    // the n-char sequence is implementation-defined."
-    auto strtoint_result = strtointeger<uint64_t>(ncharseq.data(), 0);
+    StrToNumResult<StorageType> strtoint_result =
+        strtointeger<StorageType>(ncharseq.data(), 0);
     if (!strtoint_result.has_error())
       nan_mantissa = strtoint_result.value;
 
@@ -1172,9 +1172,8 @@ LIBC_INLINE StrToNumResult<T> strtofloatingpoint(const char *__restrict src) {
           ++index;
         if (src[index] == ')') {
           ++index;
-          auto nan_mantissa_result = nan_mantissa_from_ncharseq(
+          nan_mantissa = nan_mantissa_from_ncharseq<T>(
               cpp::string_view(src + (left_paren + 1), index - left_paren - 2));
-          nan_mantissa = static_cast<StorageType>(nan_mantissa_result);
         } else {
           index = left_paren;
         }
@@ -1221,11 +1220,8 @@ template <class T> LIBC_INLINE StrToNumResult<T> strtonan(const char *arg) {
   while (isalnum(arg[index]) || arg[index] == '_')
     ++index;
 
-  if (arg[index] == '\0') {
-    auto nan_mantissa_result =
-        nan_mantissa_from_ncharseq(cpp::string_view(arg, index));
-    nan_mantissa = static_cast<StorageType>(nan_mantissa_result);
-  }
+  if (arg[index] == '\0')
+    nan_mantissa = nan_mantissa_from_ncharseq<T>(cpp::string_view(arg, index));
 
   result = FPBits::quiet_nan(fputil::Sign::POS, nan_mantissa);
   return {result.get_val(), 0, error};
diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h
index b878089..02c71d4 100644
--- a/libc/src/__support/str_to_integer.h
+++ b/libc/src/__support/str_to_integer.h
@@ -11,6 +11,7 @@
 
 #include "src/__support/CPP/limits.h"
 #include "src/__support/CPP/type_traits.h"
+#include "src/__support/UInt128.h"
 #include "src/__support/common.h"
 #include "src/__support/ctype_utils.h"
 #include "src/__support/str_to_num_result.h"
@@ -75,8 +76,12 @@ template <class T>
 LIBC_INLINE StrToNumResult<T>
 strtointeger(const char *__restrict src, int base,
              const size_t src_len = cpp::numeric_limits<size_t>::max()) {
-  // TODO: Rewrite to support numbers longer than long long
-  unsigned long long result = 0;
+  using ResultType = typename cpp::conditional_t<(cpp::is_same_v<T, UInt128> ||
+                                                  cpp::is_same_v<T, Int128>),
+                                                 UInt128, unsigned long long>;
+
+  ResultType result = 0;
+
   bool is_number = false;
   size_t src_cur = 0;
   int error_val = 0;
@@ -101,15 +106,16 @@ strtointeger(const char *__restrict src, int base,
   if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur))
     src_cur = src_cur + 2;
 
-  constexpr bool IS_UNSIGNED = (cpp::numeric_limits<T>::min() == 0);
+  constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
   const bool is_positive = (result_sign == '+');
-  unsigned long long constexpr NEGATIVE_MAX =
-      !IS_UNSIGNED
-          ? static_cast<unsigned long long>(cpp::numeric_limits<T>::max()) + 1
-          : cpp::numeric_limits<T>::max();
-  unsigned long long const abs_max =
+
+  ResultType constexpr NEGATIVE_MAX =
+      !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
+                   : cpp::numeric_limits<T>::max();
+  ResultType const abs_max =
       (is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX);
-  unsigned long long const abs_max_div_by_base = abs_max / base;
+  ResultType const abs_max_div_by_base = abs_max / base;
+
   while (src_cur < src_len && isalnum(src[src_cur])) {
     int cur_digit = b36_char_to_int(src[src_cur]);
     if (cur_digit >= base)
@@ -149,10 +155,12 @@ strtointeger(const char *__restrict src, int base,
       return {cpp::numeric_limits<T>::min(), str_len, error_val};
   }
 
-  return {is_positive
-              ? static_cast<T>(result)
-              : static_cast<T>(-static_cast<cpp::make_unsigned_t<T>>(result)),
-          str_len, error_val};
+  return {
+      is_positive
+          ? static_cast<T>(result)
+          : static_cast<T>(
+                -static_cast<make_integral_or_big_int_unsigned_t<T>>(result)),
+      str_len, error_val};
 }
 
 } // namespace internal
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 750fd5f..6c82caa 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -190,6 +190,7 @@ add_math_entrypoint_object(modff128)
 add_math_entrypoint_object(nan)
 add_math_entrypoint_object(nanf)
 add_math_entrypoint_object(nanl)
+add_math_entrypoint_object(nanf128)
 
 add_math_entrypoint_object(nearbyint)
 add_math_entrypoint_object(nearbyintf)
@@ -204,6 +205,14 @@ add_math_entrypoint_object(nexttoward)
 add_math_entrypoint_object(nexttowardf)
 add_math_entrypoint_object(nexttowardl)
 
+add_math_entrypoint_object(nextdown)
+add_math_entrypoint_object(nextdownf)
+add_math_entrypoint_object(nextdownf128)
+
+add_math_entrypoint_object(nextup)
+add_math_entrypoint_object(nextupf)
+add_math_entrypoint_object(nextupf128)
+
 add_math_entrypoint_object(pow)
 add_math_entrypoint_object(powf)
 
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 667381d..ca99863 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1781,6 +1781,19 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
+  nanf128
+  SRCS
+    nanf128.cpp
+  HDRS
+    ../nanf128.h
+  DEPENDS
+    libc.src.__support.str_to_float
+    libc.src.errno.errno
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
   nextafter
   SRCS
     nextafter.cpp
@@ -1866,6 +1879,80 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
+  nextdown
+  SRCS
+    nextdown.cpp
+  HDRS
+    ../nextdown.h
+  DEPENDS
+    libc.src.__support.FPUtil.manipulation_functions
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
+  nextdownf
+  SRCS
+    nextdownf.cpp
+  HDRS
+    ../nextdownf.h
+  DEPENDS
+    libc.src.__support.FPUtil.manipulation_functions
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
+  nextdownf128
+  SRCS
+    nextdownf128.cpp
+  HDRS
+    ../nextdownf128.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.manipulation_functions
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
+  nextup
+  SRCS
+    nextup.cpp
+  HDRS
+    ../nextup.h
+  DEPENDS
+    libc.src.__support.FPUtil.manipulation_functions
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
+  nextupf
+  SRCS
+    nextupf.cpp
+  HDRS
+    ../nextupf.h
+  DEPENDS
+    libc.src.__support.FPUtil.manipulation_functions
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
+  nextupf128
+  SRCS
+    nextupf128.cpp
+  HDRS
+    ../nextupf128.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.manipulation_functions
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
   fmod
   SRCS
     fmod.cpp
diff --git a/libc/src/math/generic/nanf128.cpp b/libc/src/math/generic/nanf128.cpp
new file mode 100644
index 0000000..f087c9f
--- /dev/null
+++ b/libc/src/math/generic/nanf128.cpp
@@ -0,0 +1,23 @@
+//===-- Implementation of nanf128 function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nanf128.h"
+#include "src/__support/common.h"
+#include "src/__support/str_to_float.h"
+#include "src/errno/libc_errno.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float128, nanf128, (const char *arg)) {
+  auto result = internal::strtonan<float128>(arg);
+  if (result.has_error())
+    libc_errno = result.error;
+  return result.value;
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nextdown.cpp b/libc/src/math/generic/nextdown.cpp
new file mode 100644
index 0000000..51dee48
--- /dev/null
+++ b/libc/src/math/generic/nextdown.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of nextdown function -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nextdown.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, nextdown, (double x)) {
+  return fputil::nextupdown</*IsDown=*/true>(x);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nextdownf.cpp b/libc/src/math/generic/nextdownf.cpp
new file mode 100644
index 0000000..857b412
--- /dev/null
+++ b/libc/src/math/generic/nextdownf.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of nextdownf function ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nextdownf.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float, nextdownf, (float x)) {
+  return fputil::nextupdown</*IsDown=*/true>(x);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nextdownf128.cpp b/libc/src/math/generic/nextdownf128.cpp
new file mode 100644
index 0000000..2585a13
--- /dev/null
+++ b/libc/src/math/generic/nextdownf128.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of nextdownf128 function ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nextdownf128.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float128, nextdownf128, (float128 x)) {
+  return fputil::nextupdown</*IsDown=*/true>(x);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nextup.cpp b/libc/src/math/generic/nextup.cpp
new file mode 100644
index 0000000..d75a336
--- /dev/null
+++ b/libc/src/math/generic/nextup.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of nextup function ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nextup.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, nextup, (double x)) {
+  return fputil::nextupdown</*IsDown=*/false>(x);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nextupf.cpp b/libc/src/math/generic/nextupf.cpp
new file mode 100644
index 0000000..3b18dae
--- /dev/null
+++ b/libc/src/math/generic/nextupf.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of nextupf function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nextupf.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float, nextupf, (float x)) {
+  return fputil::nextupdown</*IsDown=*/false>(x);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nextupf128.cpp b/libc/src/math/generic/nextupf128.cpp
new file mode 100644
index 0000000..7d862c3
--- /dev/null
+++ b/libc/src/math/generic/nextupf128.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of nextupf128 function -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nextupf128.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float128, nextupf128, (float128 x)) {
+  return fputil::nextupdown</*IsDown=*/false>(x);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/nanf128.h b/libc/src/math/nanf128.h
new file mode 100644
index 0000000..b06d14e
--- /dev/null
+++ b/libc/src/math/nanf128.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for nanf128 -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NANF128_H
+#define LLVM_LIBC_SRC_MATH_NANF128_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float128 nanf128(const char *arg);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NANF128_H
diff --git a/libc/src/math/nextdown.h b/libc/src/math/nextdown.h
new file mode 100644
index 0000000..8049b17
--- /dev/null
+++ b/libc/src/math/nextdown.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for nextdown ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTDOWN_H
+#define LLVM_LIBC_SRC_MATH_NEXTDOWN_H
+
+namespace LIBC_NAMESPACE {
+
+double nextdown(double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTDOWN_H
diff --git a/libc/src/math/nextdownf.h b/libc/src/math/nextdownf.h
new file mode 100644
index 0000000..0a2f234
--- /dev/null
+++ b/libc/src/math/nextdownf.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for nextdownf ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTDOWNF_H
+#define LLVM_LIBC_SRC_MATH_NEXTDOWNF_H
+
+namespace LIBC_NAMESPACE {
+
+float nextdownf(float x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTDOWNF_H
diff --git a/libc/src/math/nextdownf128.h b/libc/src/math/nextdownf128.h
new file mode 100644
index 0000000..0a3043b
--- /dev/null
+++ b/libc/src/math/nextdownf128.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for nextdownf128 ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTDOWNF128_H
+#define LLVM_LIBC_SRC_MATH_NEXTDOWNF128_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float128 nextdownf128(float128 x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTDOWNF128_H
diff --git a/libc/src/math/nextup.h b/libc/src/math/nextup.h
new file mode 100644
index 0000000..97ae822
--- /dev/null
+++ b/libc/src/math/nextup.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for nextup ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTUP_H
+#define LLVM_LIBC_SRC_MATH_NEXTUP_H
+
+namespace LIBC_NAMESPACE {
+
+double nextup(double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTUP_H
diff --git a/libc/src/math/nextupf.h b/libc/src/math/nextupf.h
new file mode 100644
index 0000000..ffc0fa1
--- /dev/null
+++ b/libc/src/math/nextupf.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for nextupf -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTUPF_H
+#define LLVM_LIBC_SRC_MATH_NEXTUPF_H
+
+namespace LIBC_NAMESPACE {
+
+float nextupf(float x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTUPF_H
diff --git a/libc/src/math/nextupf128.h b/libc/src/math/nextupf128.h
new file mode 100644
index 0000000..b442992
--- /dev/null
+++ b/libc/src/math/nextupf128.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for nextupf128 --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTUPF128_H
+#define LLVM_LIBC_SRC_MATH_NEXTUPF128_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float128 nextupf128(float128 x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTUPF128_H
diff --git a/libc/src/stdlib/atexit.cpp b/libc/src/stdlib/atexit.cpp
index 1513b79..741ea4f 100644
--- a/libc/src/stdlib/atexit.cpp
+++ b/libc/src/stdlib/atexit.cpp
@@ -36,7 +36,7 @@ struct AtExitUnit {
 //        mutexes simply passthrough. We will need a lock free stack.
 using ExitCallbackList = FixedVector<AtExitUnit, 64>;
 #elif defined(LIBC_COPT_PUBLIC_PACKAGING)
-using ExitCallbackList = cpp::ReverseOrderBlockStore<AtExitUnit, 32>;
+using ExitCallbackList = ReverseOrderBlockStore<AtExitUnit, 32>;
 #else
 // BlockStore uses dynamic memory allocation. To avoid dynamic memory
 // allocation in tests, we use a fixed size callback list when built for
diff --git a/libc/test/UnitTest/FPMatcher.h b/libc/test/UnitTest/FPMatcher.h
index 4525b9e..ae9d674 100644
--- a/libc/test/UnitTest/FPMatcher.h
+++ b/libc/test/UnitTest/FPMatcher.h
@@ -102,8 +102,10 @@ template <typename T> struct FPTest : public Test {
   const T inf = FPBits::inf(Sign::POS).get_val();                              \
   const T neg_inf = FPBits::inf(Sign::NEG).get_val();                          \
   const T min_normal = FPBits::min_normal().get_val();                         \
-  const T max_normal = FPBits::max_normal().get_val();                         \
-  const T min_denormal = FPBits::min_subnormal().get_val();                    \
+  const T max_normal = FPBits::max_normal(Sign::POS).get_val();                \
+  const T neg_max_normal = FPBits::max_normal(Sign::NEG).get_val();            \
+  const T min_denormal = FPBits::min_subnormal(Sign::POS).get_val();           \
+  const T neg_min_denormal = FPBits::min_subnormal(Sign::NEG).get_val();       \
   const T max_denormal = FPBits::max_subnormal().get_val();
 
 #define EXPECT_FP_EQ(expected, actual)                                         \
diff --git a/libc/test/src/__support/blockstore_test.cpp b/libc/test/src/__support/blockstore_test.cpp
index f628572..5fe8fef 100644
--- a/libc/test/src/__support/blockstore_test.cpp
+++ b/libc/test/src/__support/blockstore_test.cpp
@@ -19,7 +19,7 @@ class LlvmLibcBlockStoreTest : public LIBC_NAMESPACE::testing::Test {
 public:
   template <size_t BLOCK_SIZE, size_t ELEMENT_COUNT, bool REVERSE>
   void populate_and_iterate() {
-    LIBC_NAMESPACE::cpp::BlockStore<Element, BLOCK_SIZE, REVERSE> block_store;
+    LIBC_NAMESPACE::BlockStore<Element, BLOCK_SIZE, REVERSE> block_store;
     for (int i = 0; i < int(ELEMENT_COUNT); ++i)
       ASSERT_TRUE(block_store.push_back({i, 2 * i, 3 * unsigned(i)}));
     auto end = block_store.end();
@@ -38,12 +38,12 @@ public:
       }
     }
     ASSERT_EQ(i, int(ELEMENT_COUNT));
-    LIBC_NAMESPACE::cpp::BlockStore<Element, BLOCK_SIZE, REVERSE>::destroy(
+    LIBC_NAMESPACE::BlockStore<Element, BLOCK_SIZE, REVERSE>::destroy(
         &block_store);
   }
 
   template <bool REVERSE> void back_test() {
-    using LIBC_NAMESPACE::cpp::BlockStore;
+    using LIBC_NAMESPACE::BlockStore;
     BlockStore<int, 4, REVERSE> block_store;
     for (int i = 0; i < 20; i++)
       ASSERT_TRUE(block_store.push_back(i));
@@ -53,7 +53,7 @@ public:
   }
 
   template <bool REVERSE> void empty_test() {
-    using LIBC_NAMESPACE::cpp::BlockStore;
+    using LIBC_NAMESPACE::BlockStore;
     BlockStore<int, 2, REVERSE> block_store;
 
     ASSERT_TRUE(block_store.empty());
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 293e65a..72cb4e9 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -1507,6 +1507,22 @@ add_fp_unittest(
 )
 
 add_fp_unittest(
+  nanf128_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    nanf128_test.cpp
+  DEPENDS
+    libc.include.math
+    libc.include.signal
+    libc.src.math.nanf128
+    libc.src.__support.FPUtil.fp_bits
+  # FIXME: The nan tests currently have death tests, which aren't supported for
+  # hermetic tests.
+  UNIT_TEST_ONLY
+)
+
+add_fp_unittest(
   nextafter_test
   SUITE
     libc-math-smoke-tests
@@ -1614,6 +1630,90 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fp_bits
 )
 
+add_fp_unittest(
+  nextdown_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    nextdown_test.cpp
+  HDRS
+    NextDownTest.h
+  DEPENDS
+    libc.include.math
+    libc.src.math.nextdown
+    libc.src.__support.FPUtil.manipulation_functions
+)
+
+add_fp_unittest(
+  nextdownf_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    nextdownf_test.cpp
+  HDRS
+    NextDownTest.h
+  DEPENDS
+    libc.include.math
+    libc.src.math.nextdownf
+    libc.src.__support.FPUtil.manipulation_functions
+)
+
+add_fp_unittest(
+  nextdownf128_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    nextdownf128_test.cpp
+  HDRS
+    NextDownTest.h
+  DEPENDS
+    libc.include.math
+    libc.src.math.nextdownf128
+    libc.src.__support.FPUtil.manipulation_functions
+)
+
+add_fp_unittest(
+  nextup_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    nextup_test.cpp
+  HDRS
+    NextUpTest.h
+  DEPENDS
+    libc.include.math
+    libc.src.math.nextup
+    libc.src.__support.FPUtil.manipulation_functions
+)
+
+add_fp_unittest(
+  nextupf_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    nextupf_test.cpp
+  HDRS
+    NextUpTest.h
+  DEPENDS
+    libc.include.math
+    libc.src.math.nextupf
+    libc.src.__support.FPUtil.manipulation_functions
+)
+
+add_fp_unittest(
+  nextupf128_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    nextupf128_test.cpp
+  HDRS
+    NextUpTest.h
+  DEPENDS
+    libc.include.math
+    libc.src.math.nextupf128
+    libc.src.__support.FPUtil.manipulation_functions
+)
+
 # TODO(lntue): The current implementation of fputil::general::fma<float> is only
 # correctly rounded for the default rounding mode round-to-nearest tie-to-even.
 add_fp_unittest(
diff --git a/libc/test/src/math/smoke/NextDownTest.h b/libc/test/src/math/smoke/NextDownTest.h
new file mode 100644
index 0000000..c678ab1
--- /dev/null
+++ b/libc/test/src/math/smoke/NextDownTest.h
@@ -0,0 +1,43 @@
+//===-- Utility class to test different flavors of nextdown -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TEST_SRC_MATH_NEXTDOWNTEST_H
+#define LLVM_LIBC_TEST_SRC_MATH_NEXTDOWNTEST_H
+
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+template <typename T>
+class NextDownTestTemplate : public LIBC_NAMESPACE::testing::Test {
+
+  DECLARE_SPECIAL_CONSTANTS(T)
+
+public:
+  typedef T (*NextDownFunc)(T);
+
+  void testNaN(NextDownFunc func) { ASSERT_FP_EQ(func(aNaN), aNaN); }
+
+  void testBoundaries(NextDownFunc func) {
+    ASSERT_FP_EQ(zero, func(min_denormal));
+
+    ASSERT_FP_EQ(neg_min_denormal, func(zero));
+    ASSERT_FP_EQ(neg_min_denormal, func(neg_zero));
+
+    ASSERT_FP_EQ(neg_max_normal, func(neg_max_normal));
+    ASSERT_FP_EQ(neg_inf, func(neg_inf));
+
+    ASSERT_FP_EQ(max_normal, func(inf));
+  }
+};
+
+#define LIST_NEXTDOWN_TESTS(T, func)                                           \
+  using LlvmLibcNextDownTest = NextDownTestTemplate<T>;                        \
+  TEST_F(LlvmLibcNextDownTest, TestNaN) { testNaN(&func); }                    \
+  TEST_F(LlvmLibcNextDownTest, TestBoundaries) { testBoundaries(&func); }
+
+#endif // LLVM_LIBC_TEST_SRC_MATH_NEXTDOWNTEST_H
diff --git a/libc/test/src/math/smoke/NextUpTest.h b/libc/test/src/math/smoke/NextUpTest.h
new file mode 100644
index 0000000..ebbdb5c
--- /dev/null
+++ b/libc/test/src/math/smoke/NextUpTest.h
@@ -0,0 +1,43 @@
+//===-- Utility class to test different flavors of nextup -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TEST_SRC_MATH_NEXTUPTEST_H
+#define LLVM_LIBC_TEST_SRC_MATH_NEXTUPTEST_H
+
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+template <typename T>
+class NextUpTestTemplate : public LIBC_NAMESPACE::testing::Test {
+
+  DECLARE_SPECIAL_CONSTANTS(T)
+
+public:
+  typedef T (*NextUpFunc)(T);
+
+  void testNaN(NextUpFunc func) { ASSERT_FP_EQ(func(aNaN), aNaN); }
+
+  void testBoundaries(NextUpFunc func) {
+    ASSERT_FP_EQ(neg_zero, func(neg_min_denormal));
+
+    ASSERT_FP_EQ(min_denormal, func(zero));
+    ASSERT_FP_EQ(min_denormal, func(neg_zero));
+
+    ASSERT_FP_EQ(max_normal, func(max_normal));
+    ASSERT_FP_EQ(inf, func(inf));
+
+    ASSERT_FP_EQ(neg_max_normal, func(neg_inf));
+  }
+};
+
+#define LIST_NEXTUP_TESTS(T, func)                                             \
+  using LlvmLibcNextUpTest = NextUpTestTemplate<T>;                            \
+  TEST_F(LlvmLibcNextUpTest, TestNaN) { testNaN(&func); }                      \
+  TEST_F(LlvmLibcNextUpTest, TestBoundaries) { testBoundaries(&func); }
+
+#endif // LLVM_LIBC_TEST_SRC_MATH_NEXTUPTEST_H
diff --git a/libc/test/src/math/smoke/nanf128_test.cpp b/libc/test/src/math/smoke/nanf128_test.cpp
new file mode 100644
index 0000000..2a9f57d
--- /dev/null
+++ b/libc/test/src/math/smoke/nanf128_test.cpp
@@ -0,0 +1,60 @@
+//===-- Unittests for nanf128 ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/UInt128.h"
+#include "src/math/nanf128.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+class LlvmLibcNanf128Test : public LIBC_NAMESPACE::testing::Test {
+public:
+  using FPBits128 = LIBC_NAMESPACE::fputil::FPBits<float128>;
+  using StorageType = FPBits128::StorageType;
+
+  const UInt128 QUIET_NAN = FPBits128::quiet_nan().uintval();
+  const UInt128 ONE = UInt128(1);
+
+  void run_test(const char *input_str, StorageType bits) {
+    float128 result = LIBC_NAMESPACE::nanf128(input_str);
+    auto actual_fp = FPBits128(result);
+    auto expected_fp = FPBits128(bits);
+    EXPECT_EQ(actual_fp.uintval(), expected_fp.uintval());
+  };
+};
+
+TEST_F(LlvmLibcNanf128Test, NCharSeq) {
+  run_test("", QUIET_NAN);
+  run_test("1234", QUIET_NAN | 1234);
+  run_test("0x1234", QUIET_NAN | 0x1234);
+  run_test("2417851639229258349412352", QUIET_NAN | (ONE << 81));
+  run_test("0x200000000000000000000", QUIET_NAN | (ONE << 81));
+  run_test("10384593717069655257060992658440191",
+           QUIET_NAN | FPBits128::SIG_MASK);
+  run_test("0x1ffffffffffffffffffffffffffff", QUIET_NAN | FPBits128::SIG_MASK);
+  run_test("10384593717069655257060992658440192", QUIET_NAN);
+  run_test("0x20000000000000000000000000000", QUIET_NAN);
+  run_test("1a", QUIET_NAN);
+  run_test("10000000000000000000000000000000000000000000000000", QUIET_NAN);
+}
+
+TEST_F(LlvmLibcNanf128Test, RandomString) {
+  run_test(" 1234", QUIET_NAN);
+  run_test("-1234", QUIET_NAN);
+  run_test("asd&f", QUIET_NAN);
+  run_test("123 ", QUIET_NAN);
+  run_test("1234567890qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM_",
+           QUIET_NAN);
+}
+
+#ifndef LIBC_HAVE_ADDRESS_SANITIZER
+#include <signal.h>
+TEST_F(LlvmLibcNanf128Test, InvalidInput) {
+  EXPECT_DEATH([] { LIBC_NAMESPACE::nanf128(nullptr); }, WITH_SIGNAL(SIGSEGV));
+}
+#endif // LIBC_HAVE_ADDRESS_SANITIZER
diff --git a/libc/test/src/math/smoke/nextdown_test.cpp b/libc/test/src/math/smoke/nextdown_test.cpp
new file mode 100644
index 0000000..6b0f5c6
--- /dev/null
+++ b/libc/test/src/math/smoke/nextdown_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nextdown --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextDownTest.h"
+
+#include "src/math/nextdown.h"
+
+LIST_NEXTDOWN_TESTS(double, LIBC_NAMESPACE::nextdown)
diff --git a/libc/test/src/math/smoke/nextdownf128_test.cpp b/libc/test/src/math/smoke/nextdownf128_test.cpp
new file mode 100644
index 0000000..932a8b3
--- /dev/null
+++ b/libc/test/src/math/smoke/nextdownf128_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nextdownf128 ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextDownTest.h"
+
+#include "src/math/nextdownf128.h"
+
+LIST_NEXTDOWN_TESTS(float128, LIBC_NAMESPACE::nextdownf128)
diff --git a/libc/test/src/math/smoke/nextdownf_test.cpp b/libc/test/src/math/smoke/nextdownf_test.cpp
new file mode 100644
index 0000000..3c05c22
--- /dev/null
+++ b/libc/test/src/math/smoke/nextdownf_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nextdownf -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextDownTest.h"
+
+#include "src/math/nextdownf.h"
+
+LIST_NEXTDOWN_TESTS(float, LIBC_NAMESPACE::nextdownf)
diff --git a/libc/test/src/math/smoke/nextup_test.cpp b/libc/test/src/math/smoke/nextup_test.cpp
new file mode 100644
index 0000000..04c73ac
--- /dev/null
+++ b/libc/test/src/math/smoke/nextup_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nextup ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextUpTest.h"
+
+#include "src/math/nextup.h"
+
+LIST_NEXTUP_TESTS(double, LIBC_NAMESPACE::nextup)
diff --git a/libc/test/src/math/smoke/nextupf128_test.cpp b/libc/test/src/math/smoke/nextupf128_test.cpp
new file mode 100644
index 0000000..ddd385a
--- /dev/null
+++ b/libc/test/src/math/smoke/nextupf128_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nextupf128 ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextUpTest.h"
+
+#include "src/math/nextupf128.h"
+
+LIST_NEXTUP_TESTS(float128, LIBC_NAMESPACE::nextupf128)
diff --git a/libc/test/src/math/smoke/nextupf_test.cpp b/libc/test/src/math/smoke/nextupf_test.cpp
new file mode 100644
index 0000000..df73bee
--- /dev/null
+++ b/libc/test/src/math/smoke/nextupf_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nextupf ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextUpTest.h"
+
+#include "src/math/nextupf.h"
+
+LIST_NEXTUP_TESTS(float, LIBC_NAMESPACE::nextupf)
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index fa1d8e4..18f7794 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -174,6 +174,12 @@ add_custom_command(
 	DEPENDS ${script_loc} )
 add_custom_target( "generate_convert.cl" DEPENDS convert.cl )
 
+add_custom_command(
+	OUTPUT clspv-convert.cl
+	COMMAND ${Python3_EXECUTABLE} ${script_loc} --clspv > clspv-convert.cl
+	DEPENDS ${script_loc} )
+add_custom_target( "clspv-generate_convert.cl" DEPENDS clspv-convert.cl )
+
 enable_testing()
 
 foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
@@ -218,11 +224,14 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
 	# Add the generated convert.cl here to prevent adding
 	# the one listed in SOURCES
 	if( NOT ${ARCH} STREQUAL "spirv" AND NOT ${ARCH} STREQUAL "spirv64" )
-		set( rel_files convert.cl )
-		set( objects convert.cl )
 		if( NOT ENABLE_RUNTIME_SUBNORMAL AND NOT ${ARCH} STREQUAL "clspv" AND
 		    NOT ${ARCH} STREQUAL "clspv64" )
+			set( rel_files convert.cl )
+			set( objects convert.cl )
 			list( APPEND rel_files generic/lib/subnormal_use_default.ll )
+		elseif(${ARCH} STREQUAL "clspv" OR ${ARCH} STREQUAL "clspv64")
+			set( rel_files clspv-convert.cl )
+			set( objects clspv-convert.cl )
 		endif()
 	else()
 		set( rel_files )
@@ -286,6 +295,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
 		# multiple invocations
 		add_dependencies( builtins.link.${arch_suffix}
 			generate_convert.cl )
+		add_dependencies( builtins.link.${arch_suffix}
+			clspv-generate_convert.cl )
 		# CMake will turn this include into absolute path
 		target_include_directories( builtins.link.${arch_suffix} PRIVATE
 			"generic/include" )
diff --git a/libclc/generic/lib/gen_convert.py b/libclc/generic/lib/gen_convert.py
index 612a918..21fc8eb 100644
--- a/libclc/generic/lib/gen_convert.py
+++ b/libclc/generic/lib/gen_convert.py
@@ -2,6 +2,7 @@
 #
 # Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
 # Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
+# Copyright (c) 2024 Romaric Jodin <rjodin@chromium.org>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -26,6 +27,16 @@
 #
 # convert_<destTypen><_sat><_roundingMode>(<sourceTypen>)
 
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--clspv", action="store_true", help="Generate the clspv variant of the code"
+)
+args = parser.parse_args()
+
+clspv = args.clspv
+
 types = [
     "char",
     "uchar",
@@ -251,13 +262,19 @@ def generate_default_conversion(src, dst, mode):
         print("#endif")
 
 
-for src in types:
-    for dst in types:
-        generate_default_conversion(src, dst, "")
+# Do not generate default conversion for clspv as they are handled natively
+if not clspv:
+    for src in types:
+        for dst in types:
+            generate_default_conversion(src, dst, "")
 
 for src in int_types:
     for dst in int_types:
         for mode in rounding_modes:
+            # Do not generate "_rte" conversion for clspv as they are handled
+            # natively
+            if clspv and mode == "_rte":
+                continue
             generate_default_conversion(src, dst, mode)
 
 #
@@ -304,21 +321,38 @@ def generate_saturated_conversion(src, dst, size):
 
     elif src in float_types:
 
-        # Conversion from float to int
-        print(
-            """  {DST}{N} y = convert_{DST}{N}(x);
-  y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
-  y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
-  return y;""".format(
-                SRC=src,
-                DST=dst,
-                N=size,
-                DST_MIN=limit_min[dst],
-                DST_MAX=limit_max[dst],
-                BP=bool_prefix,
-                BS=bool_suffix,
+        if clspv:
+            # Conversion from float to int
+            print(
+                """  {DST}{N} y = convert_{DST}{N}(x);
+                y = select(y, ({DST}{N}){DST_MIN}, {BP}(x <= ({SRC}{N}){DST_MIN}){BS});
+                y = select(y, ({DST}{N}){DST_MAX}, {BP}(x >= ({SRC}{N}){DST_MAX}){BS});
+                return y;""".format(
+                    SRC=src,
+                    DST=dst,
+                    N=size,
+                    DST_MIN=limit_min[dst],
+                    DST_MAX=limit_max[dst],
+                    BP=bool_prefix,
+                    BS=bool_suffix,
+                )
+            )
+        else:
+            # Conversion from float to int
+            print(
+                """  {DST}{N} y = convert_{DST}{N}(x);
+                y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
+                y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
+                return y;""".format(
+                    SRC=src,
+                    DST=dst,
+                    N=size,
+                    DST_MIN=limit_min[dst],
+                    DST_MAX=limit_max[dst],
+                    BP=bool_prefix,
+                    BS=bool_suffix,
+                )
             )
-        )
 
     else:
 
@@ -432,7 +466,10 @@ def generate_float_conversion(src, dst, size, mode, sat):
         print("  return convert_{DST}{N}(x);".format(DST=dst, N=size))
     else:
         print("  {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size))
-        print("  {SRC}{N} y = convert_{SRC}{N}(r);".format(SRC=src, N=size))
+        if clspv:
+            print("  {SRC}{N} y = convert_{SRC}{N}_sat(r);".format(SRC=src, N=size))
+        else:
+            print("  {SRC}{N} y = convert_{SRC}{N}(r);".format(SRC=src, N=size))
         if mode == "_rtz":
             if src in int_types:
                 print(
@@ -448,11 +485,29 @@ def generate_float_conversion(src, dst, size, mode, sat):
             else:
                 print("  {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size))
                 print("  {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size))
-            print(
-                "  return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
-                    DST=dst, N=size, BOOL=bool_type[dst]
+            if clspv:
+                print(
+                    "  {BOOL}{N} c = convert_{BOOL}{N}(abs_y > abs_x);".format(
+                        BOOL=bool_type[dst], N=size
+                    )
+                )
+                if sizeof_type[src] >= 4 and src in int_types:
+                    print(
+                        "  c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
+                            BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src]
+                        )
+                    )
+                print(
+                    "  return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format(
+                        DST=dst, N=size, BOOL=bool_type[dst], SRC=src
+                    )
+                )
+            else:
+                print(
+                    "  return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
+                        DST=dst, N=size, BOOL=bool_type[dst]
+                    )
                 )
-            )
         if mode == "_rtp":
             print(
                 "  return select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format(
@@ -460,11 +515,29 @@ def generate_float_conversion(src, dst, size, mode, sat):
                 )
             )
         if mode == "_rtn":
-            print(
-                "  return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format(
-                    DST=dst, N=size, BOOL=bool_type[dst]
+            if clspv:
+                print(
+                    "  {BOOL}{N} c = convert_{BOOL}{N}(y > x);".format(
+                        BOOL=bool_type[dst], N=size
+                    )
+                )
+                if sizeof_type[src] >= 4 and src in int_types:
+                    print(
+                        "  c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
+                            BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src]
+                        )
+                    )
+                print(
+                    "  return select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format(
+                        DST=dst, N=size, BOOL=bool_type[dst], SRC=src
+                    )
+                )
+            else:
+                print(
+                    "  return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format(
+                        DST=dst, N=size, BOOL=bool_type[dst]
+                    )
                 )
-            )
 
     # Footer
     print("}")
@@ -484,4 +557,8 @@ for src in types:
     for dst in float_types:
         for size in vector_sizes:
             for mode in rounding_modes:
+                # Do not generate "_rte" conversion for clspv as they are
+                # handled natively
+                if clspv and mode == "_rte":
+                    continue
                 generate_float_conversion(src, dst, size, mode, "")
diff --git a/libcxx/docs/Modules.rst b/libcxx/docs/Modules.rst
index ee2b81d..5b027ed 100644
--- a/libcxx/docs/Modules.rst
+++ b/libcxx/docs/Modules.rst
@@ -179,33 +179,12 @@ This is a small sample program that uses the module ``std``. It consists of a
   FetchContent_MakeAvailable(std)
 
   #
-  # Adjust project compiler flags
-  #
-
-  add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fprebuilt-module-path=${std_BINARY_DIR}/CMakeFiles/std.dir/>)
-  add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fprebuilt-module-path=${std_BINARY_DIR}/CMakeFiles/std.compat.dir/>)
-  add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-nostdinc++>)
-  # The include path needs to be set to be able to use macros from headers.
-  # For example from, the headers <cassert> and <version>.
-  add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-isystem>)
-  add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${LIBCXX_BUILD}/include/c++/v1>)
-
-  #
-  # Adjust project linker flags
-  #
-
-  add_link_options($<$<COMPILE_LANGUAGE:CXX>:-nostdlib++>)
-  add_link_options($<$<COMPILE_LANGUAGE:CXX>:-L${LIBCXX_BUILD}/lib>)
-  add_link_options($<$<COMPILE_LANGUAGE:CXX>:-Wl,-rpath,${LIBCXX_BUILD}/lib>)
-  # Linking against the standard c++ library is required for CMake to get the proper dependencies.
-  link_libraries(std c++)
-  link_libraries(std.compat c++)
-
-  #
   # Add the project
   #
 
   add_executable(main)
+  add_dependencies(main std.compat)
+  target_link_libraries(main std.compat)
   target_sources(main
     PRIVATE
       main.cpp
@@ -218,13 +197,9 @@ Building this project is done with the following steps, assuming the files
 
   $ mkdir build
   $ cmake -G Ninja -S . -B build -DCMAKE_CXX_COMPILER=<path-to-compiler> -DLIBCXX_BUILD=<build>
-  $ ninja -j1 std -C build
   $ ninja -C build
   $ build/main
 
-.. note:: The ``std`` dependencies of ``std.compat`` is not always resolved when
-          building the ``std`` target using multiple jobs.
-
 .. warning:: ``<path-to-compiler>`` should point point to the real binary and
              not to a symlink.
 
diff --git a/libcxx/docs/Status/Cxx23Issues.csv b/libcxx/docs/Status/Cxx23Issues.csv
index e003455..43282f3 100644
--- a/libcxx/docs/Status/Cxx23Issues.csv
+++ b/libcxx/docs/Status/Cxx23Issues.csv
@@ -283,7 +283,7 @@
 "`3655 <https://wg21.link/LWG3655>`__","The ``INVOKE`` operation and union types","February 2023","|Complete|","18.0",""
 "`3723 <https://wg21.link/LWG3723>`__","``priority_queue::push_range`` needs to ``append_range``","February 2023","","","|ranges|"
 "`3734 <https://wg21.link/LWG3734>`__","Inconsistency in ``inout_ptr`` and ``out_ptr`` for empty case","February 2023","","",""
-"`3772 <https://wg21.link/LWG3772>`__","``repeat_view``'s ``piecewise`` constructor is missing Postconditions","February 2023","","","|ranges|"
+"`3772 <https://wg21.link/LWG3772>`__","``repeat_view``'s ``piecewise`` constructor is missing Postconditions","February 2023","|Complete|","17.0","|ranges|"
 "`3786 <https://wg21.link/LWG3786>`__","Flat maps' deduction guide needs to default ``Allocator`` to be useful","February 2023","","",""
 "`3803 <https://wg21.link/LWG3803>`__","``flat_foo`` constructors taking ``KeyContainer`` lack ``KeyCompare`` parameter","February 2023","","",""
 "`3810 <https://wg21.link/LWG3810>`__","CTAD for ``std::basic_format_args``","February 2023","|Complete|","17.0","|format|"
diff --git a/libcxx/include/__format/concepts.h b/libcxx/include/__format/concepts.h
index d7b5a9d..13380e9 100644
--- a/libcxx/include/__format/concepts.h
+++ b/libcxx/include/__format/concepts.h
@@ -15,10 +15,12 @@
 #include <__config>
 #include <__format/format_parse_context.h>
 #include <__fwd/format.h>
+#include <__fwd/tuple.h>
+#include <__tuple/tuple_size.h>
 #include <__type_traits/is_specialization.h>
 #include <__type_traits/remove_const.h>
+#include <__type_traits/remove_reference.h>
 #include <__utility/pair.h>
-#include <tuple>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
diff --git a/libcxx/include/__functional/hash.h b/libcxx/include/__functional/hash.h
index a466c83..118e83e 100644
--- a/libcxx/include/__functional/hash.h
+++ b/libcxx/include/__functional/hash.h
@@ -10,23 +10,20 @@
 #define _LIBCPP___FUNCTIONAL_HASH_H
 
 #include <__config>
-#include <__functional/invoke.h>
 #include <__functional/unary_function.h>
 #include <__fwd/functional.h>
-#include <__tuple/sfinae_helpers.h>
+#include <__type_traits/conjunction.h>
+#include <__type_traits/invoke.h>
 #include <__type_traits/is_copy_constructible.h>
 #include <__type_traits/is_default_constructible.h>
 #include <__type_traits/is_enum.h>
 #include <__type_traits/is_move_constructible.h>
 #include <__type_traits/underlying_type.h>
-#include <__utility/forward.h>
-#include <__utility/move.h>
 #include <__utility/pair.h>
 #include <__utility/swap.h>
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
-#include <limits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
diff --git a/libcxx/include/__tuple/sfinae_helpers.h b/libcxx/include/__tuple/sfinae_helpers.h
index 90e9b1e..35a57ff 100644
--- a/libcxx/include/__tuple/sfinae_helpers.h
+++ b/libcxx/include/__tuple/sfinae_helpers.h
@@ -16,6 +16,7 @@
 #include <__tuple/tuple_like_ext.h>
 #include <__tuple/tuple_size.h>
 #include <__tuple/tuple_types.h>
+#include <__type_traits/conjunction.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/integral_constant.h>
 #include <__type_traits/is_constructible.h>
@@ -32,12 +33,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #ifndef _LIBCPP_CXX03_LANG
 
-template <bool... _Preds>
-struct __all_dummy;
-
-template <bool... _Pred>
-struct __all : _IsSame<__all_dummy<_Pred...>, __all_dummy<((void)_Pred, true)...>> {};
-
 struct __tuple_sfinae_base {
   template <template <class, class...> class _Trait, class... _LArgs, class... _RArgs>
   static auto __do_test(__tuple_types<_LArgs...>, __tuple_types<_RArgs...>)
diff --git a/libcxx/include/__tuple/tuple_size.h b/libcxx/include/__tuple/tuple_size.h
index b832010..668be13 100644
--- a/libcxx/include/__tuple/tuple_size.h
+++ b/libcxx/include/__tuple/tuple_size.h
@@ -43,7 +43,7 @@ struct _LIBCPP_TEMPLATE_VIS tuple_size<__enable_if_tuple_size_imp< volatile _Tp,
 
 template <class _Tp>
 struct _LIBCPP_TEMPLATE_VIS
-    tuple_size<__enable_if_tuple_size_imp< const volatile _Tp, integral_constant<size_t, sizeof(tuple_size<_Tp>)>>>
+    tuple_size<__enable_if_tuple_size_imp<const volatile _Tp, integral_constant<size_t, sizeof(tuple_size<_Tp>)>>>
     : public integral_constant<size_t, tuple_size<_Tp>::value> {};
 
 #else
@@ -63,6 +63,11 @@ struct _LIBCPP_TEMPLATE_VIS tuple_size<tuple<_Tp...> > : public integral_constan
 template <class... _Tp>
 struct _LIBCPP_TEMPLATE_VIS tuple_size<__tuple_types<_Tp...> > : public integral_constant<size_t, sizeof...(_Tp)> {};
 
+#  if _LIBCPP_STD_VER >= 17
+template <class _Tp>
+inline constexpr size_t tuple_size_v = tuple_size<_Tp>::value;
+#  endif
+
 #endif // _LIBCPP_CXX03_LANG
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__type_traits/conjunction.h b/libcxx/include/__type_traits/conjunction.h
index 4bfa5a2..c299559 100644
--- a/libcxx/include/__type_traits/conjunction.h
+++ b/libcxx/include/__type_traits/conjunction.h
@@ -13,6 +13,7 @@
 #include <__type_traits/conditional.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/integral_constant.h>
+#include <__type_traits/is_same.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -37,6 +38,12 @@ false_type __and_helper(...);
 template <class... _Pred>
 using _And _LIBCPP_NODEBUG = decltype(std::__and_helper<_Pred...>(0));
 
+template <bool... _Preds>
+struct __all_dummy;
+
+template <bool... _Pred>
+struct __all : _IsSame<__all_dummy<_Pred...>, __all_dummy<((void)_Pred, true)...> > {};
+
 #if _LIBCPP_STD_VER >= 17
 
 template <class...>
diff --git a/libcxx/include/array b/libcxx/include/array
index 7fa5dc1..932b1b9 100644
--- a/libcxx/include/array
+++ b/libcxx/include/array
@@ -123,6 +123,7 @@ template <size_t I, class T, size_t N> const T&& get(const array<T, N>&&) noexce
 #include <__iterator/wrap_iter.h>
 #include <__tuple/sfinae_helpers.h>
 #include <__type_traits/conditional.h>
+#include <__type_traits/conjunction.h>
 #include <__type_traits/is_array.h>
 #include <__type_traits/is_const.h>
 #include <__type_traits/is_constructible.h>
diff --git a/libcxx/include/chrono b/libcxx/include/chrono
index b3b260c..0320c1d 100644
--- a/libcxx/include/chrono
+++ b/libcxx/include/chrono
@@ -878,6 +878,7 @@ constexpr chrono::year                                  operator ""y(unsigned lo
 #  include <cstring>
 #  include <forward_list>
 #  include <string>
+#  include <tuple>
 #endif
 
 #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER == 20
diff --git a/libcxx/include/coroutine b/libcxx/include/coroutine
index 4bd1d4e..b1ba83b 100644
--- a/libcxx/include/coroutine
+++ b/libcxx/include/coroutine
@@ -56,6 +56,7 @@ struct suspend_always;
 
 #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
 #  include <iosfwd>
+#  include <limits>
 #  include <type_traits>
 #endif
 
diff --git a/libcxx/include/experimental/memory b/libcxx/include/experimental/memory
index 7e698fe..e9663d4 100644
--- a/libcxx/include/experimental/memory
+++ b/libcxx/include/experimental/memory
@@ -191,4 +191,8 @@ _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP_ENABLE_EXPERIMENTAL
 
+#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
+#  include <limits>
+#endif
+
 #endif /* _LIBCPP_EXPERIMENTAL_MEMORY */
diff --git a/libcxx/include/fstream b/libcxx/include/fstream
index 776641b..7a084d1 100644
--- a/libcxx/include/fstream
+++ b/libcxx/include/fstream
@@ -308,6 +308,43 @@ private:
   state_type __st_;
   state_type __st_last_;
   ios_base::openmode __om_;
+  // There have been no file operations yet, which allows setting unbuffered
+  // I/O mode.
+  static const ios_base::openmode __no_io_operations = ios_base::trunc;
+  // Unbuffered I/O mode has been requested.
+  static const ios_base::openmode __use_unbuffered_io = ios_base::ate;
+  // Used to track the currently used mode and track whether the output should
+  // be unbuffered.
+  // [filebuf.virtuals]/12
+  //   If setbuf(0, 0) is called on a stream before any I/O has occurred on
+  //   that stream, the stream becomes unbuffered. Otherwise the results are
+  //   implementation-defined.
+  // This allows calling setbuf(0, 0)
+  // - before opening a file,
+  // - after opening a file, before
+  //   - a read
+  //   - a write
+  //   - a seek.
+  // Note that opening a file with ios_base::ate does a seek operation.
+  // Normally underflow, overflow, and sync change this flag to ios_base::in,
+  // ios_base_out, or 0.
+  //
+  // The ios_base::trunc and ios_base::ate flags are not used in __cm_. They
+  // are used to track the state of the unbuffered request. For readability
+  // they have the aliases __no_io_operations and __use_unbuffered_io
+  // respectively.
+  //
+  // The __no_io_operations and __use_unbuffered_io flags are used in the
+  // following way:
+  // - __no_io_operations is set upon construction to indicate the unbuffered
+  //   state can be set.
+  // - When requesting unbuffered output:
+  //   - If the file is open it sets the mode.
+  //   - Else places a request by adding the __use_unbuffered_io flag.
+  // - When a file is opened it checks whether both __no_io_operations and
+  //   __use_unbuffered_io are set. If so switches to unbuffered mode.
+  // - All file I/O operations change the mode effectively clearing the
+  //   __no_io_operations and __use_unbuffered_io flags.
   ios_base::openmode __cm_;
   bool __owns_eb_;
   bool __owns_ib_;
@@ -327,7 +364,13 @@ private:
       return nullptr;
 
     __om_ = __mode;
+    if (__cm_ == (__no_io_operations | __use_unbuffered_io)) {
+      std::setbuf(__file_, nullptr);
+      __cm_ = 0;
+    }
+
     if (__mode & ios_base::ate) {
+      __cm_ = 0;
       if (fseek(__file_, 0, SEEK_END)) {
         fclose(__file_);
         __file_ = nullptr;
@@ -337,6 +380,20 @@ private:
 
     return this;
   }
+
+  // If the file is already open, switch to unbuffered mode. Otherwise, record
+  // the request to use unbuffered mode so that we use that mode when we
+  // eventually open the file.
+  _LIBCPP_HIDE_FROM_ABI void __request_unbuffered_mode(char_type* __s, streamsize __n) {
+    if (__cm_ == __no_io_operations && __s == nullptr && __n == 0) {
+      if (__file_) {
+        std::setbuf(__file_, nullptr);
+        __cm_ = 0;
+      } else {
+        __cm_ = __no_io_operations | __use_unbuffered_io;
+      }
+    }
+  }
 };
 
 template <class _CharT, class _Traits>
@@ -352,7 +409,7 @@ basic_filebuf<_CharT, _Traits>::basic_filebuf()
       __st_(),
       __st_last_(),
       __om_(0),
-      __cm_(0),
+      __cm_(__no_io_operations),
       __owns_eb_(false),
       __owns_ib_(false),
       __always_noconv_(false) {
@@ -810,6 +867,7 @@ template <class _CharT, class _Traits>
 basic_streambuf<_CharT, _Traits>* basic_filebuf<_CharT, _Traits>::setbuf(char_type* __s, streamsize __n) {
   this->setg(nullptr, nullptr, nullptr);
   this->setp(nullptr, nullptr);
+  __request_unbuffered_mode(__s, __n);
   if (__owns_eb_)
     delete[] __extbuf_;
   if (__owns_ib_)
diff --git a/libcxx/include/limits b/libcxx/include/limits
index f15b5b1..f022048 100644
--- a/libcxx/include/limits
+++ b/libcxx/include/limits
@@ -137,9 +137,9 @@ protected:
   typedef _Tp type;
 
   static _LIBCPP_CONSTEXPR const bool is_specialized = false;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return type(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return type(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return type(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return type(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return type(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return type(); }
 
   static _LIBCPP_CONSTEXPR const int digits       = 0;
   static _LIBCPP_CONSTEXPR const int digits10     = 0;
@@ -148,8 +148,8 @@ protected:
   static _LIBCPP_CONSTEXPR const bool is_integer  = false;
   static _LIBCPP_CONSTEXPR const bool is_exact    = false;
   static _LIBCPP_CONSTEXPR const int radix        = 0;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(); }
 
   static _LIBCPP_CONSTEXPR const int min_exponent   = 0;
   static _LIBCPP_CONSTEXPR const int min_exponent10 = 0;
@@ -161,10 +161,10 @@ protected:
   static _LIBCPP_CONSTEXPR const bool has_signaling_NaN                                    = false;
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_absent;
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss          = false;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(); }
 
   static _LIBCPP_CONSTEXPR const bool is_iec559  = false;
   static _LIBCPP_CONSTEXPR const bool is_bounded = false;
@@ -198,15 +198,15 @@ protected:
   static _LIBCPP_CONSTEXPR const int max_digits10 = 0;
   static _LIBCPP_CONSTEXPR const type __min       = __libcpp_compute_min<type, digits, is_signed>::value;
   static _LIBCPP_CONSTEXPR const type __max       = is_signed ? type(type(~0) ^ __min) : type(~0);
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __min; }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __max; }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return min(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __min; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __max; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return min(); }
 
   static _LIBCPP_CONSTEXPR const bool is_integer = true;
   static _LIBCPP_CONSTEXPR const bool is_exact   = true;
   static _LIBCPP_CONSTEXPR const int radix       = 2;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(0); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(0); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(0); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(0); }
 
   static _LIBCPP_CONSTEXPR const int min_exponent   = 0;
   static _LIBCPP_CONSTEXPR const int min_exponent10 = 0;
@@ -218,10 +218,10 @@ protected:
   static _LIBCPP_CONSTEXPR const bool has_signaling_NaN                                    = false;
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_absent;
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss          = false;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(0); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(0); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(0); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(0); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(0); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(0); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(0); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(0); }
 
   static _LIBCPP_CONSTEXPR const bool is_iec559  = false;
   static _LIBCPP_CONSTEXPR const bool is_bounded = true;
@@ -249,15 +249,15 @@ protected:
   static _LIBCPP_CONSTEXPR const int max_digits10 = 0;
   static _LIBCPP_CONSTEXPR const type __min       = false;
   static _LIBCPP_CONSTEXPR const type __max       = true;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __min; }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __max; }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return min(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __min; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __max; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return min(); }
 
   static _LIBCPP_CONSTEXPR const bool is_integer = true;
   static _LIBCPP_CONSTEXPR const bool is_exact   = true;
   static _LIBCPP_CONSTEXPR const int radix       = 2;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(0); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(0); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(0); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(0); }
 
   static _LIBCPP_CONSTEXPR const int min_exponent   = 0;
   static _LIBCPP_CONSTEXPR const int min_exponent10 = 0;
@@ -269,10 +269,10 @@ protected:
   static _LIBCPP_CONSTEXPR const bool has_signaling_NaN                                    = false;
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_absent;
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss          = false;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(0); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(0); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(0); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(0); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(0); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(0); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(0); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(0); }
 
   static _LIBCPP_CONSTEXPR const bool is_iec559  = false;
   static _LIBCPP_CONSTEXPR const bool is_bounded = true;
@@ -294,15 +294,15 @@ protected:
   static _LIBCPP_CONSTEXPR const int digits       = __FLT_MANT_DIG__;
   static _LIBCPP_CONSTEXPR const int digits10     = __FLT_DIG__;
   static _LIBCPP_CONSTEXPR const int max_digits10 = 2 + (digits * 30103l) / 100000l;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __FLT_MIN__; }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __FLT_MAX__; }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __FLT_MIN__; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __FLT_MAX__; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); }
 
   static _LIBCPP_CONSTEXPR const bool is_integer = false;
   static _LIBCPP_CONSTEXPR const bool is_exact   = false;
   static _LIBCPP_CONSTEXPR const int radix       = __FLT_RADIX__;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __FLT_EPSILON__; }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5F; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __FLT_EPSILON__; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5F; }
 
   static _LIBCPP_CONSTEXPR const int min_exponent   = __FLT_MIN_EXP__;
   static _LIBCPP_CONSTEXPR const int min_exponent10 = __FLT_MIN_10_EXP__;
@@ -314,10 +314,10 @@ protected:
   static _LIBCPP_CONSTEXPR const bool has_signaling_NaN                                    = true;
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_present;
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss          = false;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return __builtin_huge_valf(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return __builtin_nanf(""); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return __builtin_nansf(""); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return __FLT_DENORM_MIN__; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return __builtin_huge_valf(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return __builtin_nanf(""); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return __builtin_nansf(""); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return __FLT_DENORM_MIN__; }
 
   static _LIBCPP_CONSTEXPR const bool is_iec559  = true;
   static _LIBCPP_CONSTEXPR const bool is_bounded = true;
@@ -343,15 +343,15 @@ protected:
   static _LIBCPP_CONSTEXPR const int digits       = __DBL_MANT_DIG__;
   static _LIBCPP_CONSTEXPR const int digits10     = __DBL_DIG__;
   static _LIBCPP_CONSTEXPR const int max_digits10 = 2 + (digits * 30103l) / 100000l;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __DBL_MIN__; }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __DBL_MAX__; }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __DBL_MIN__; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __DBL_MAX__; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); }
 
   static _LIBCPP_CONSTEXPR const bool is_integer = false;
   static _LIBCPP_CONSTEXPR const bool is_exact   = false;
   static _LIBCPP_CONSTEXPR const int radix       = __FLT_RADIX__;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __DBL_EPSILON__; }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __DBL_EPSILON__; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5; }
 
   static _LIBCPP_CONSTEXPR const int min_exponent   = __DBL_MIN_EXP__;
   static _LIBCPP_CONSTEXPR const int min_exponent10 = __DBL_MIN_10_EXP__;
@@ -363,10 +363,10 @@ protected:
   static _LIBCPP_CONSTEXPR const bool has_signaling_NaN                                    = true;
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_present;
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss          = false;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return __builtin_huge_val(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return __builtin_nan(""); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return __builtin_nans(""); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return __DBL_DENORM_MIN__; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return __builtin_huge_val(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return __builtin_nan(""); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return __builtin_nans(""); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return __DBL_DENORM_MIN__; }
 
   static _LIBCPP_CONSTEXPR const bool is_iec559  = true;
   static _LIBCPP_CONSTEXPR const bool is_bounded = true;
@@ -392,15 +392,15 @@ protected:
   static _LIBCPP_CONSTEXPR const int digits       = __LDBL_MANT_DIG__;
   static _LIBCPP_CONSTEXPR const int digits10     = __LDBL_DIG__;
   static _LIBCPP_CONSTEXPR const int max_digits10 = 2 + (digits * 30103l) / 100000l;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __LDBL_MIN__; }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __LDBL_MAX__; }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __LDBL_MIN__; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __LDBL_MAX__; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); }
 
   static _LIBCPP_CONSTEXPR const bool is_integer = false;
   static _LIBCPP_CONSTEXPR const bool is_exact   = false;
   static _LIBCPP_CONSTEXPR const int radix       = __FLT_RADIX__;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __LDBL_EPSILON__; }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5L; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __LDBL_EPSILON__; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5L; }
 
   static _LIBCPP_CONSTEXPR const int min_exponent   = __LDBL_MIN_EXP__;
   static _LIBCPP_CONSTEXPR const int min_exponent10 = __LDBL_MIN_10_EXP__;
@@ -412,10 +412,10 @@ protected:
   static _LIBCPP_CONSTEXPR const bool has_signaling_NaN                                    = true;
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_present;
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss          = false;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return __builtin_huge_vall(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return __builtin_nanl(""); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return __builtin_nansl(""); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return __LDBL_DENORM_MIN__; }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return __builtin_huge_vall(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return __builtin_nanl(""); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return __builtin_nansl(""); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return __LDBL_DENORM_MIN__; }
 
 #if defined(__powerpc__) && defined(__LONG_DOUBLE_IBM128__)
   static _LIBCPP_CONSTEXPR const bool is_iec559 = false;
@@ -441,9 +441,9 @@ class _LIBCPP_TEMPLATE_VIS numeric_limits : private __libcpp_numeric_limits<_Tp>
 
 public:
   static _LIBCPP_CONSTEXPR const bool is_specialized = __base::is_specialized;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __base::min(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __base::max(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return __base::lowest(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __base::min(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __base::max(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return __base::lowest(); }
 
   static _LIBCPP_CONSTEXPR const int digits       = __base::digits;
   static _LIBCPP_CONSTEXPR const int digits10     = __base::digits10;
@@ -452,8 +452,8 @@ public:
   static _LIBCPP_CONSTEXPR const bool is_integer  = __base::is_integer;
   static _LIBCPP_CONSTEXPR const bool is_exact    = __base::is_exact;
   static _LIBCPP_CONSTEXPR const int radix        = __base::radix;
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __base::epsilon(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return __base::round_error(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __base::epsilon(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return __base::round_error(); }
 
   static _LIBCPP_CONSTEXPR const int min_exponent   = __base::min_exponent;
   static _LIBCPP_CONSTEXPR const int min_exponent10 = __base::min_exponent10;
@@ -467,10 +467,10 @@ public:
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = __base::has_denorm;
   static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss          = __base::has_denorm_loss;
   _LIBCPP_SUPPRESS_DEPRECATED_POP
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return __base::infinity(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return __base::quiet_NaN(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return __base::signaling_NaN(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return __base::denorm_min(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return __base::infinity(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return __base::quiet_NaN(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return __base::signaling_NaN(); }
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return __base::denorm_min(); }
 
   static _LIBCPP_CONSTEXPR const bool is_iec559  = __base::is_iec559;
   static _LIBCPP_CONSTEXPR const bool is_bounded = __base::is_bounded;
diff --git a/libcxx/include/optional b/libcxx/include/optional
index 99bfd0d..94c5789 100644
--- a/libcxx/include/optional
+++ b/libcxx/include/optional
@@ -1291,6 +1291,7 @@ _LIBCPP_POP_MACROS
 #  include <concepts>
 #  include <ctime>
 #  include <iterator>
+#  include <limits>
 #  include <memory>
 #  include <ratio>
 #  include <stdexcept>
diff --git a/libcxx/include/tuple b/libcxx/include/tuple
index e63e4e2..02ace66 100644
--- a/libcxx/include/tuple
+++ b/libcxx/include/tuple
@@ -1369,9 +1369,6 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_T1, _T2>::pair(
       second(std::forward<_Args2>(std::get<_I2>(__second_args))...) {}
 
 #  if _LIBCPP_STD_VER >= 17
-template <class _Tp>
-inline constexpr size_t tuple_size_v = tuple_size<_Tp>::value;
-
 #    define _LIBCPP_NOEXCEPT_RETURN(...)                                                                               \
       noexcept(noexcept(__VA_ARGS__)) { return __VA_ARGS__; }
 
diff --git a/libcxx/include/variant b/libcxx/include/variant
index 59d7f9b..25d47ec 100644
--- a/libcxx/include/variant
+++ b/libcxx/include/variant
@@ -228,6 +228,7 @@ namespace std {
 #include <__type_traits/add_pointer.h>
 #include <__type_traits/add_volatile.h>
 #include <__type_traits/common_type.h>
+#include <__type_traits/conjunction.h>
 #include <__type_traits/dependent_type.h>
 #include <__type_traits/is_array.h>
 #include <__type_traits/is_default_constructible.h>
diff --git a/libcxx/include/vector b/libcxx/include/vector
index 89cbdf0b..90507e6 100644
--- a/libcxx/include/vector
+++ b/libcxx/include/vector
@@ -2990,6 +2990,7 @@ _LIBCPP_POP_MACROS
 #  include <concepts>
 #  include <cstdlib>
 #  include <locale>
+#  include <tuple>
 #  include <type_traits>
 #  include <typeinfo>
 #  include <utility>
diff --git a/libcxx/modules/CMakeLists.txt.in b/libcxx/modules/CMakeLists.txt.in
index e332d70..c35f6fe 100644
--- a/libcxx/modules/CMakeLists.txt.in
+++ b/libcxx/modules/CMakeLists.txt.in
@@ -50,10 +50,15 @@ endif()
 target_compile_options(std
   PUBLIC
     -nostdinc++
+    @LIBCXX_COMPILE_FLAGS@
+)
+target_compile_options(std
+  PRIVATE
     -Wno-reserved-module-identifier
     -Wno-reserved-user-defined-literal
-    @LIBCXX_COMPILE_FLAGS@
 )
+target_link_options(std PUBLIC -nostdlib++ -Wl,-rpath,@LIBCXX_LIBRARY_DIR@ -L@LIBCXX_LIBRARY_DIR@)
+target_link_libraries(std c++)
 set_target_properties(std
   PROPERTIES
     OUTPUT_NAME   "c++std"
@@ -67,7 +72,7 @@ target_sources(std.compat
     std.compat.cppm
 )
 
-target_include_directories(std.compat SYSTEM PRIVATE @LIBCXX_CONFIGURED_INCLUDE_DIRS@)
+target_include_directories(std.compat SYSTEM PUBLIC @LIBCXX_CONFIGURED_INCLUDE_DIRS@)
 
 if (NOT @LIBCXX_ENABLE_EXCEPTIONS@)
   target_compile_options(std.compat PUBLIC -fno-exceptions)
@@ -76,13 +81,16 @@ endif()
 target_compile_options(std.compat
   PUBLIC
     -nostdinc++
+    @LIBCXX_COMPILE_FLAGS@
+)
+target_compile_options(std.compat
+ PRIVATE
     -Wno-reserved-module-identifier
     -Wno-reserved-user-defined-literal
-	-fmodule-file=std=${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/std.dir/std.pcm
-    @LIBCXX_COMPILE_FLAGS@
 )
 set_target_properties(std.compat
   PROPERTIES
     OUTPUT_NAME   "c++std.compat"
 )
 add_dependencies(std.compat std)
+target_link_libraries(std.compat PUBLIC std c++)
diff --git a/libcxx/test/libcxx/diagnostics/limits.nodiscard_extensions.compile.pass.cpp b/libcxx/test/libcxx/diagnostics/limits.nodiscard_extensions.compile.pass.cpp
new file mode 100644
index 0000000..a93f55b
--- /dev/null
+++ b/libcxx/test/libcxx/diagnostics/limits.nodiscard_extensions.compile.pass.cpp
@@ -0,0 +1,71 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_NODISCARD_EXT
+
+// Check that <limits> functions aren't marked [[nodiscard]] when
+// _LIBCPP_DISABLE_NODISCARD_EXT is defined
+
+#include <limits>
+
+#include "test_macros.h"
+
+void func() {
+  // arithmetic
+  std::numeric_limits<int>::min();
+  std::numeric_limits<int>::max();
+  std::numeric_limits<int>::lowest();
+  std::numeric_limits<int>::epsilon();
+  std::numeric_limits<int>::round_error();
+  std::numeric_limits<int>::infinity();
+  std::numeric_limits<int>::quiet_NaN();
+  std::numeric_limits<int>::signaling_NaN();
+  std::numeric_limits<int>::denorm_min();
+  // bool
+  std::numeric_limits<bool>::min();
+  std::numeric_limits<bool>::max();
+  std::numeric_limits<bool>::lowest();
+  std::numeric_limits<bool>::epsilon();
+  std::numeric_limits<bool>::round_error();
+  std::numeric_limits<bool>::infinity();
+  std::numeric_limits<bool>::quiet_NaN();
+  std::numeric_limits<bool>::signaling_NaN();
+  std::numeric_limits<bool>::denorm_min();
+  // float
+  std::numeric_limits<float>::min();
+  std::numeric_limits<float>::max();
+  std::numeric_limits<float>::lowest();
+  std::numeric_limits<float>::epsilon();
+  std::numeric_limits<float>::round_error();
+  std::numeric_limits<float>::infinity();
+  std::numeric_limits<float>::quiet_NaN();
+  std::numeric_limits<float>::signaling_NaN();
+  std::numeric_limits<float>::denorm_min();
+  // double
+  std::numeric_limits<double>::min();
+  std::numeric_limits<double>::max();
+  std::numeric_limits<double>::lowest();
+  std::numeric_limits<double>::epsilon();
+  std::numeric_limits<double>::round_error();
+  std::numeric_limits<double>::infinity();
+  std::numeric_limits<double>::quiet_NaN();
+  std::numeric_limits<double>::signaling_NaN();
+  std::numeric_limits<double>::denorm_min();
+  // long double
+  std::numeric_limits<long double>::min();
+  std::numeric_limits<long double>::max();
+  std::numeric_limits<long double>::lowest();
+  std::numeric_limits<long double>::epsilon();
+  std::numeric_limits<long double>::round_error();
+  std::numeric_limits<long double>::infinity();
+  std::numeric_limits<long double>::quiet_NaN();
+  std::numeric_limits<long double>::signaling_NaN();
+  std::numeric_limits<long double>::denorm_min();
+}
diff --git a/libcxx/test/libcxx/diagnostics/limits.nodiscard_extensions.verify.cpp b/libcxx/test/libcxx/diagnostics/limits.nodiscard_extensions.verify.cpp
new file mode 100644
index 0000000..7a81b84
--- /dev/null
+++ b/libcxx/test/libcxx/diagnostics/limits.nodiscard_extensions.verify.cpp
@@ -0,0 +1,70 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// check that <limits> functions are marked [[nodiscard]]
+
+#include <limits>
+
+#include "test_macros.h"
+
+void func() {
+  // clang-format off
+  // arithmetic
+  std::numeric_limits<int>::min(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<int>::max(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<int>::lowest(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<int>::epsilon(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<int>::round_error(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<int>::infinity(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<int>::quiet_NaN(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<int>::signaling_NaN(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<int>::denorm_min(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  // bool
+  std::numeric_limits<bool>::min(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<bool>::max(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<bool>::lowest(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<bool>::epsilon(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<bool>::round_error(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<bool>::infinity(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<bool>::quiet_NaN(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<bool>::signaling_NaN(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<bool>::denorm_min(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  // float
+  std::numeric_limits<float>::min(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<float>::max(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<float>::lowest(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<float>::epsilon(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<float>::round_error(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<float>::infinity(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<float>::quiet_NaN(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<float>::signaling_NaN(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<float>::denorm_min(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  // double
+  std::numeric_limits<double>::min(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<double>::max(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<double>::lowest(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<double>::epsilon(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<double>::round_error(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<double>::infinity(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<double>::quiet_NaN(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<double>::signaling_NaN(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<double>::denorm_min(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  // long double
+  std::numeric_limits<long double>::min(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<long double>::max(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<long double>::lowest(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<long double>::epsilon(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<long double>::round_error(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<long double>::infinity(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<long double>::quiet_NaN(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<long double>::signaling_NaN(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::numeric_limits<long double>::denorm_min(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  // clang-format on
+}
diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv
index daa3e17..ccba100 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx23.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv
@@ -85,7 +85,6 @@ chrono sstream
 chrono stdexcept
 chrono string
 chrono string_view
-chrono tuple
 chrono vector
 chrono version
 cinttypes cstdint
@@ -132,7 +131,6 @@ coroutine compare
 coroutine cstddef
 coroutine cstdint
 coroutine cstring
-coroutine limits
 coroutine version
 cstddef version
 ctgmath ccomplex
@@ -167,7 +165,6 @@ experimental/iterator iterator
 experimental/memory cstddef
 experimental/memory cstdint
 experimental/memory cstring
-experimental/memory limits
 experimental/propagate_const cstddef
 experimental/simd cstddef
 experimental/simd cstdint
@@ -397,7 +394,6 @@ optional cstddef
 optional cstdint
 optional cstring
 optional initializer_list
-optional limits
 optional new
 optional version
 ostream bitset
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index daa3e17..ccba100 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -85,7 +85,6 @@ chrono sstream
 chrono stdexcept
 chrono string
 chrono string_view
-chrono tuple
 chrono vector
 chrono version
 cinttypes cstdint
@@ -132,7 +131,6 @@ coroutine compare
 coroutine cstddef
 coroutine cstdint
 coroutine cstring
-coroutine limits
 coroutine version
 cstddef version
 ctgmath ccomplex
@@ -167,7 +165,6 @@ experimental/iterator iterator
 experimental/memory cstddef
 experimental/memory cstdint
 experimental/memory cstring
-experimental/memory limits
 experimental/propagate_const cstddef
 experimental/simd cstddef
 experimental/simd cstdint
@@ -397,7 +394,6 @@ optional cstddef
 optional cstdint
 optional cstring
 optional initializer_list
-optional limits
 optional new
 optional version
 ostream bitset
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp
new file mode 100644
index 0000000..8bcce28
--- /dev/null
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp
@@ -0,0 +1,120 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <fstream>
+
+// basic_streambuf<charT, traits>* setbuf(char_type* s, streamsize n) override;
+
+#include <fstream>
+#include <cstddef>
+#include <cassert>
+
+#include "test_macros.h"
+
+template <class CharT>
+static std::size_t file_size(const char* filename) {
+  FILE* f = std::fopen(filename, "rb");
+  std::fseek(f, 0, SEEK_END);
+  long result = std::ftell(f);
+  std::fclose(f);
+  return result;
+}
+
+// Helper class to expose some protected std::basic_filebuf<CharT> members.
+template <class CharT>
+struct filebuf : public std::basic_filebuf<CharT> {
+  CharT* base() { return this->pbase(); }
+  CharT* ptr() { return this->pptr(); }
+};
+
+template <class CharT>
+static void buffered_request() {
+  filebuf<CharT> buffer;
+
+  CharT b[10] = {0};
+  assert(buffer.pubsetbuf(b, 10) == &buffer);
+
+  buffer.open("test.dat", std::ios_base::out);
+  buffer.sputc(CharT('a'));
+  assert(b[0] == 'a');
+
+  buffer.close();
+  assert(file_size<CharT>("test.dat") == 1);
+}
+
+template <class CharT>
+static void unbuffered_request_before_open() {
+  filebuf<CharT> buffer;
+
+  assert(buffer.pubsetbuf(nullptr, 0) == &buffer);
+  assert(buffer.base() == nullptr);
+  assert(buffer.ptr() == nullptr);
+
+  buffer.open("test.dat", std::ios_base::out);
+  assert(buffer.base() == nullptr);
+  assert(buffer.ptr() == nullptr);
+
+  buffer.sputc(CharT('a'));
+  assert(buffer.base() == nullptr);
+  assert(buffer.ptr() == nullptr);
+
+  assert(file_size<CharT>("test.dat") == 1);
+}
+
+template <class CharT>
+static void unbuffered_request_after_open() {
+  filebuf<CharT> buffer;
+
+  buffer.open("test.dat", std::ios_base::out);
+
+  assert(buffer.pubsetbuf(nullptr, 0) == &buffer);
+  assert(buffer.base() == nullptr);
+  assert(buffer.ptr() == nullptr);
+
+  buffer.sputc(CharT('a'));
+  assert(buffer.base() == nullptr);
+  assert(buffer.ptr() == nullptr);
+
+  assert(file_size<CharT>("test.dat") == 1);
+}
+
+template <class CharT>
+static void unbuffered_request_after_open_ate() {
+  filebuf<CharT> buffer;
+
+  buffer.open("test.dat", std::ios_base::out | std::ios_base::ate);
+
+  assert(buffer.pubsetbuf(nullptr, 0) == &buffer);
+
+  buffer.sputc(CharT('a'));
+  assert(file_size<CharT>("test.dat") <= 1);
+  // on libc++ buffering is used by default.
+  LIBCPP_ASSERT(file_size<CharT>("test.dat") == 0);
+
+  buffer.close();
+  assert(file_size<CharT>("test.dat") == 1);
+}
+
+template <class CharT>
+static void test() {
+  buffered_request<CharT>();
+
+  unbuffered_request_before_open<CharT>();
+  unbuffered_request_after_open<CharT>();
+  unbuffered_request_after_open_ate<CharT>();
+}
+
+int main(int, char**) {
+  test<char>();
+
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test<wchar_t>();
+#endif
+
+  return 0;
+}
diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/get_tzdb.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/get_tzdb.pass.cpp
index 335e8d2..b6204c6 100644
--- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/get_tzdb.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/get_tzdb.pass.cpp
@@ -12,9 +12,6 @@
 // XFAIL: libcpp-has-no-incomplete-tzdb
 // XFAIL: availability-tzdb-missing
 
-// TODO TZDB (#81654) Enable tests
-// UNSUPPORTED: c++20, c++23, c++26
-
 // <chrono>
 
 // const tzdb& get_tzdb();
@@ -30,8 +27,6 @@ int main(int, const char**) {
 
   assert(!db.version.empty());
 
-  LIBCPP_ASSERT(!db.__rules.empty());
-
   assert(!db.zones.empty());
   assert(std::ranges::is_sorted(db.zones));
   assert(std::ranges::adjacent_find(db.zones) == db.zones.end()); // is unique?
diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/get_tzdb_list.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/get_tzdb_list.pass.cpp
index 34af9b5..a5579a3 100644
--- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/get_tzdb_list.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/get_tzdb_list.pass.cpp
@@ -12,9 +12,6 @@
 // XFAIL: libcpp-has-no-incomplete-tzdb
 // XFAIL: availability-tzdb-missing
 
-// TODO TZDB (#81654) Enable tests
-// UNSUPPORTED: c++20, c++23, c++26
-
 // <chrono>
 
 // const tzdb& get_tzdb_list();
diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.list/front.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.list/front.pass.cpp
index ac5fee8..12c5310 100644
--- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.list/front.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.list/front.pass.cpp
@@ -12,9 +12,6 @@
 // XFAIL: libcpp-has-no-incomplete-tzdb
 // XFAIL: availability-tzdb-missing
 
-// TODO TZDB (#81654) Enable tests
-// UNSUPPORTED: c++20, c++23, c++26
-
 // <chrono>
 //
 // class tzdb_list;
diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.list/iterators.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.list/iterators.pass.cpp
index 8bd9b32..b00b8b4 100644
--- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.list/iterators.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.list/iterators.pass.cpp
@@ -12,9 +12,6 @@
 // XFAIL: libcpp-has-no-incomplete-tzdb
 // XFAIL: availability-tzdb-missing
 
-// TODO TZDB (#81654) Enable tests
-// UNSUPPORTED: c++20, c++23, c++26
-
 // <chrono>
 //
 // class tzdb_list;
diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.remote/reload_tzdb.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.remote/reload_tzdb.pass.cpp
index bbf9002..af38772 100644
--- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.remote/reload_tzdb.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.remote/reload_tzdb.pass.cpp
@@ -12,9 +12,6 @@
 // XFAIL: libcpp-has-no-incomplete-tzdb
 // XFAIL: availability-tzdb-missing
 
-// TODO TZDB (#81654) Enable tests
-// UNSUPPORTED: c++20, c++23, c++26
-
 // <chrono>
 
 // Note there is no Standard way to change the remote database used.
diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.remote/remote_version.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.remote/remote_version.pass.cpp
index 861075c..36b68ce 100644
--- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.remote/remote_version.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.remote/remote_version.pass.cpp
@@ -12,9 +12,6 @@
 // XFAIL: libcpp-has-no-incomplete-tzdb
 // XFAIL: availability-tzdb-missing
 
-// TODO TZDB (#81654) Enable tests
-// UNSUPPORTED: c++20, c++23, c++26
-
 // <chrono>
 
 // const string remote_version();
diff --git a/libcxx/test/std/time/time.zone/time.zone.link/time.zone.link.members/name.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.link/time.zone.link.members/name.pass.cpp
index 95d86d5..c2412ba 100644
--- a/libcxx/test/std/time/time.zone/time.zone.link/time.zone.link.members/name.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.link/time.zone.link.members/name.pass.cpp
@@ -12,9 +12,6 @@
 // XFAIL: libcpp-has-no-incomplete-tzdb
 // XFAIL: availability-tzdb-missing
 
-// TODO TZDB (#81654) Enable tests
-// UNSUPPORTED: c++20, c++23, c++26
-
 // <chrono>
 
 // class time_zone_link;
diff --git a/libcxx/test/std/time/time.zone/time.zone.link/time.zone.link.members/target.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.link/time.zone.link.members/target.pass.cpp
index 305fbd2..2f8b5b9 100644
--- a/libcxx/test/std/time/time.zone/time.zone.link/time.zone.link.members/target.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.link/time.zone.link.members/target.pass.cpp
@@ -12,9 +12,6 @@
 // XFAIL: libcpp-has-no-incomplete-tzdb
 // XFAIL: availability-tzdb-missing
 
-// TODO TZDB (#81654) Enable tests
-// UNSUPPORTED: c++20, c++23, c++26
-
 // <chrono>
 
 // class time_zone_link;
diff --git a/libcxx/test/std/time/time.zone/time.zone.link/time.zone.link.nonmembers/comparison.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.link/time.zone.link.nonmembers/comparison.pass.cpp
index e375d7e..944818c 100644
--- a/libcxx/test/std/time/time.zone/time.zone.link/time.zone.link.nonmembers/comparison.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.link/time.zone.link.nonmembers/comparison.pass.cpp
@@ -12,9 +12,6 @@
 // XFAIL: libcpp-has-no-incomplete-tzdb
 // XFAIL: availability-tzdb-missing
 
-// TODO TZDB (#81654) Enable tests
-// UNSUPPORTED: c++20, c++23, c++26
-
 // <chrono>
 
 // bool operator==(const time_zone_link& x, const time_zone_link& y) noexcept;
diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/name.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/name.pass.cpp
index 2bbe714..d1ff2fe 100644
--- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/name.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/name.pass.cpp
@@ -12,9 +12,6 @@
 // XFAIL: libcpp-has-no-incomplete-tzdb
 // XFAIL: availability-tzdb-missing
 
-// TODO TZDB (#81654) Enable tests
-// UNSUPPORTED: c++20, c++23, c++26
-
 // <chrono>
 
 // class time_zone;
diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.nonmembers/comparison.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.nonmembers/comparison.pass.cpp
index 9eae91e..7c68070 100644
--- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.nonmembers/comparison.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.nonmembers/comparison.pass.cpp
@@ -12,9 +12,6 @@
 // XFAIL: libcpp-has-no-incomplete-tzdb
 // XFAIL: availability-tzdb-missing
 
-// TODO TZDB (#81654) Enable tests
-// UNSUPPORTED: c++20, c++23, c++26
-
 // <chrono>
 
 // bool operator==(const time_zone& x, const time_zone& y) noexcept;
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_v.verify.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_v.verify.cpp
index 7570230..1498904 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_v.verify.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_v.verify.cpp
@@ -17,8 +17,8 @@
 #include <tuple>
 
 void f() {
-    (void)std::tuple_size_v<std::tuple<> &>; // expected-note {{requested here}}
-    (void)std::tuple_size_v<int>; // expected-note {{requested here}}
-    (void)std::tuple_size_v<std::tuple<>*>; // expected-note {{requested here}}
-    // expected-error@tuple:* 3 {{implicit instantiation of undefined template}}
+  (void)std::tuple_size_v<std::tuple<>&>; // expected-note {{requested here}}
+  (void)std::tuple_size_v<int>;           // expected-note {{requested here}}
+  (void)std::tuple_size_v<std::tuple<>*>; // expected-note {{requested here}}
+                                          // expected-error@*:* 3 {{implicit instantiation of undefined template}}
 }
diff --git a/libcxx/utils/ci/docker-compose.yml b/libcxx/utils/ci/docker-compose.yml
index 5cefcb2..af9a484 100644
--- a/libcxx/utils/ci/docker-compose.yml
+++ b/libcxx/utils/ci/docker-compose.yml
@@ -2,7 +2,7 @@ version: '3.7'
 
 x-versions: &compiler_versions
   GCC_LATEST_VERSION: 13
-  LLVM_HEAD_VERSION: 18
+  LLVM_HEAD_VERSION: 19
 
 services:
   buildkite-builder:
diff --git a/libcxx/utils/ci/run-buildbot-container b/libcxx/utils/ci/run-buildbot-container
index 7c00b88..74e7dab 100755
--- a/libcxx/utils/ci/run-buildbot-container
+++ b/libcxx/utils/ci/run-buildbot-container
@@ -26,6 +26,6 @@ if [[ ! -d "${MONOREPO_ROOT}/libcxx/utils/ci" ]]; then
     echo "Was unable to find the root of the LLVM monorepo; are you running from within the monorepo?"
     exit 1
 fi
-docker pull ghcr.io/libcxx/libcxx-builder
-docker run -it --volume "${MONOREPO_ROOT}:/llvm" --workdir "/llvm" --cap-add=SYS_PTRACE ghcr.io/libcxx/libcxx-builder \
+docker pull ghcr.io/libcxx/actions-builder
+docker run -it --volume "${MONOREPO_ROOT}:/llvm" --workdir "/llvm" --cap-add=SYS_PTRACE ghcr.io/libcxx/actions-builder \
     bash -c 'git config --global --add safe.directory /llvm ; exec bash'
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 9ae01eb..6c46249 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -273,6 +273,7 @@ struct Config {
   bool printGcSections;
   bool printIcfSections;
   bool printMemoryUsage;
+  bool rejectMismatch;
   bool relax;
   bool relaxGP;
   bool relocatable;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 2439d14..e7160a4 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1348,6 +1348,7 @@ static void readConfigs(opt::InputArgList &args) {
   config->printArchiveStats = args.getLastArgValue(OPT_print_archive_stats);
   config->printSymbolOrder =
       args.getLastArgValue(OPT_print_symbol_order);
+  config->rejectMismatch = !args.hasArg(OPT_no_warn_mismatch);
   config->relax = args.hasFlag(OPT_relax, OPT_no_relax, true);
   config->relaxGP = args.hasFlag(OPT_relax_gp, OPT_no_relax_gp, false);
   config->rpath = getRpath(args);
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 00aebb4..e6bf39b 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -741,6 +741,15 @@ template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() {
   sections.resize(numELFShdrs);
 }
 
+static bool isKnownSpecificSectionType(uint32_t t, uint32_t flags) {
+  if (SHT_LOUSER <= t && t <= SHT_HIUSER && !(flags & SHF_ALLOC))
+    return true;
+  if (SHT_LOOS <= t && t <= SHT_HIOS && !(flags & SHF_OS_NONCONFORMING))
+    return true;
+  // Allow all processor-specific types. This is different from GNU ld.
+  return SHT_LOPROC <= t && t <= SHT_HIPROC;
+}
+
 template <class ELFT>
 void ObjFile<ELFT>::initializeSections(bool ignoreComdats,
                                        const llvm::object::ELFFile<ELFT> &obj) {
@@ -752,14 +761,15 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats,
     if (this->sections[i] == &InputSection::discarded)
       continue;
     const Elf_Shdr &sec = objSections[i];
+    const uint32_t type = sec.sh_type;
 
     // SHF_EXCLUDE'ed sections are discarded by the linker. However,
     // if -r is given, we'll let the final link discard such sections.
     // This is compatible with GNU.
     if ((sec.sh_flags & SHF_EXCLUDE) && !config->relocatable) {
-      if (sec.sh_type == SHT_LLVM_CALL_GRAPH_PROFILE)
+      if (type == SHT_LLVM_CALL_GRAPH_PROFILE)
         cgProfileSectionIndex = i;
-      if (sec.sh_type == SHT_LLVM_ADDRSIG) {
+      if (type == SHT_LLVM_ADDRSIG) {
         // We ignore the address-significance table if we know that the object
         // file was created by objcopy or ld -r. This is because these tools
         // will reorder the symbols in the symbol table, invalidating the data
@@ -778,7 +788,7 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats,
       continue;
     }
 
-    switch (sec.sh_type) {
+    switch (type) {
     case SHT_GROUP: {
       if (!config->relocatable)
         sections[i] = &InputSection::discarded;
@@ -801,12 +811,25 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats,
     case SHT_RELA:
     case SHT_NULL:
       break;
-    case SHT_LLVM_SYMPART:
-      ctx.hasSympart.store(true, std::memory_order_relaxed);
-      [[fallthrough]];
+    case SHT_PROGBITS:
+    case SHT_NOTE:
+    case SHT_NOBITS:
+    case SHT_INIT_ARRAY:
+    case SHT_FINI_ARRAY:
+    case SHT_PREINIT_ARRAY:
+      this->sections[i] =
+          createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab)));
+      break;
     default:
       this->sections[i] =
           createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab)));
+      if (type == SHT_LLVM_SYMPART)
+        ctx.hasSympart.store(true, std::memory_order_relaxed);
+      else if (config->rejectMismatch &&
+               !isKnownSpecificSectionType(type, sec.sh_flags))
+        errorOrWarn(toString(this->sections[i]) + ": unknown section type 0x" +
+                    Twine::utohexstr(type));
+      break;
     }
   }
 
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index 3819b86..c5e95d0 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -312,6 +312,9 @@ def no_dynamic_linker: F<"no-dynamic-linker">,
 def noinhibit_exec: F<"noinhibit-exec">,
   HelpText<"Retain the executable output file whenever it is still usable">;
 
+def no_warn_mismatch: F<"no-warn-mismatch">,
+  HelpText<"Suppress errors for certain unknown seciton types">;
+
 def no_nmagic: F<"no-nmagic">, MetaVarName<"<magic>">,
   HelpText<"Page align sections (default)">;
 
@@ -753,7 +756,6 @@ def: FF<"no-add-needed">;
 def: F<"no-copy-dt-needed-entries">;
 def: F<"no-ctors-in-init-array">;
 def: F<"no-keep-memory">;
-def: F<"no-warn-mismatch">;
 def: Separate<["--", "-"], "rpath-link">;
 def: J<"rpath-link=">;
 def: F<"secure-plt">;
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index a9292b3..d8782af 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -261,6 +261,9 @@ static void demoteDefined(Defined &sym, DenseMap<SectionBase *, size_t> &map) {
   Undefined(sym.file, sym.getName(), binding, sym.stOther, sym.type,
             /*discardedSecIdx=*/map.lookup(sym.section))
       .overwrite(sym);
+  // Eliminate from the symbol table, otherwise we would leave an undefined
+  // symbol if the symbol is unreferenced in the absence of GC.
+  sym.isUsedInRegularObj = false;
 }
 
 // If all references to a DSO happen to be weak, the DSO is not added to
diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index e759776..65e50e3 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -378,6 +378,8 @@ Do not put read-only non-executable sections in their own segment.
 Do not report version scripts that refer to undefined symbols.
 .It Fl -no-undefined
 Report unresolved symbols even if the linker is creating a shared library.
+.It Fl -no-warn-mismatch
+Do not reject unknown section types.
 .It Fl -no-warn-symbol-ordering
 Do not warn about problems with the symbol ordering file or call graph profile.
 .It Fl -no-warnings , Fl w
diff --git a/lld/test/ELF/incompatible-section-types2.s b/lld/test/ELF/incompatible-section-types2.s
index 3e281ce..919515f 100644
--- a/lld/test/ELF/incompatible-section-types2.s
+++ b/lld/test/ELF/incompatible-section-types2.s
@@ -6,5 +6,5 @@
 // CHECK-NEXT: >>> <internal>:(.shstrtab): SHT_STRTAB
 // CHECK-NEXT: >>> output section .shstrtab: Unknown
 
-.section .shstrtab,"",@12345
+.section .shstrtab,"",@0x60000000
 .short 20
diff --git a/lld/test/ELF/linkerscript/custom-section-type.s b/lld/test/ELF/linkerscript/custom-section-type.s
index 68454f4d..8ca0a4d 100644
--- a/lld/test/ELF/linkerscript/custom-section-type.s
+++ b/lld/test/ELF/linkerscript/custom-section-type.s
@@ -76,7 +76,7 @@ SECTIONS {
 .section progbits,"a",@note
 .byte 0
 
-.section expr,"a",@12345
+.section expr,"a",@0x60000000
 .byte 0
 
 #--- unknown1.lds
diff --git a/lld/test/ELF/linkerscript/discard-section.s b/lld/test/ELF/linkerscript/discard-section.s
index 24f3b2b..0bbebac 100644
--- a/lld/test/ELF/linkerscript/discard-section.s
+++ b/lld/test/ELF/linkerscript/discard-section.s
@@ -9,6 +9,9 @@
 # RUN: ld.lld -r -T a.lds a.o b.o -o a.ro 2>&1 | FileCheck %s --check-prefix=WARNING --implicit-check-not=warning:
 # RUN: llvm-readelf -r -s a.ro | FileCheck %s --check-prefix=RELOC
 
+# RUN: ld.lld -r --gc-sections -T a.lds a.o b.o -o a.gc.ro --no-fatal-warnings
+# RUN: llvm-readelf -r -s a.gc.ro | FileCheck %s --check-prefix=RELOC-GC
+
 # LOCAL:      error: relocation refers to a discarded section: .aaa
 # LOCAL-NEXT: >>> defined in a.o
 # LOCAL-NEXT: >>> referenced by a.o:(.bbb+0x0)
@@ -32,16 +35,18 @@
 # WARNING:      warning: relocation refers to a discarded section: .aaa
 # WARNING-NEXT: >>> referenced by a.o:(.rela.bbb+0x0)
 
+## GNU ld reports "defined in discarded secion" errors even in -r mode.
+## We set the symbol index to 0.
 # RELOC:      Relocation section '.rela.bbb' at offset {{.*}} contains 1 entries:
 # RELOC-NEXT:     Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
 # RELOC-NEXT: 0000000000000000  0000000000000000 R_X86_64_NONE                             0
 # RELOC-EMPTY:
 # RELOC-NEXT: Relocation section '.rela.data' at offset {{.*}} contains 4 entries:
 # RELOC-NEXT:     Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
-# RELOC-NEXT: 0000000000000000  0000000500000001 R_X86_64_64            0000000000000000 global + 0
-# RELOC-NEXT: 0000000000000008  0000000700000001 R_X86_64_64            0000000000000000 weak + 0
-# RELOC-NEXT: 0000000000000010  0000000600000001 R_X86_64_64            0000000000000000 weakref1 + 0
-# RELOC-NEXT: 0000000000000018  0000000800000001 R_X86_64_64            0000000000000000 weakref2 + 0
+# RELOC-NEXT: 0000000000000000  0000000000000001 R_X86_64_64                             0
+# RELOC-NEXT: 0000000000000008  0000000000000001 R_X86_64_64                             0
+# RELOC-NEXT: 0000000000000010  0000000000000001 R_X86_64_64                             0
+# RELOC-NEXT: 0000000000000018  0000000000000001 R_X86_64_64                             0
 
 # RELOC:      Num:    Value          Size Type    Bind   Vis      Ndx Name
 # RELOC-NEXT:   0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND
@@ -49,23 +54,25 @@
 # RELOC-NEXT:   2: 0000000000000000     0 SECTION LOCAL  DEFAULT    2 .bbb
 # RELOC-NEXT:   3: 0000000000000000     0 SECTION LOCAL  DEFAULT    4 .data
 # RELOC-NEXT:   4: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT    1 _start
-# RELOC-NEXT:   5: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT   UND global
-# RELOC-NEXT:   6: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT   UND weakref1
-# RELOC-NEXT:   7: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT   UND weak
-# RELOC-NEXT:   8: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT   UND weakref2
 # RELOC-EMPTY:
 
+# RELOC-GC:   There are no relocations in this file.
+
 #--- a.s
 .globl _start
 _start:
 
 .section .aaa,"a"
-.globl global, weakref1
+.globl global, weakref1, unused
 .weak weak, weakref2
 global:
 weak:
 weakref1:
 weakref2:
+## Eliminate `unused` just like GC discarded definitions.
+## Linux kernel's CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y configuration expects
+## that the unreferenced `unused` is not emitted to .symtab.
+unused:
   .quad 0
 
 .section .bbb,"aw"
diff --git a/lld/test/ELF/unknown-section.test b/lld/test/ELF/unknown-section.test
new file mode 100644
index 0000000..f6ecca2
--- /dev/null
+++ b/lld/test/ELF/unknown-section.test
@@ -0,0 +1,48 @@
+# RUN: rm -rf %t && mkdir %t && cd %t
+# RUN: yaml2obj %s -o a.o
+# RUN: not ld.lld a.o -o /dev/null 2>&1 | FileCheck %s --implicit-check-not=error:
+
+# CHECK:      error: a.o:(relr): unknown section type 0x13
+# CHECK-NEXT: error: a.o:(regular): unknown section type 0x15
+# CHECK-NEXT: error: a.o:(loos_nonconforming): unknown section type 0x60000000
+# CHECK-NEXT: error: a.o:(hios_nonconforming): unknown section type 0x6fffffff
+# CHECK-NEXT: error: a.o:(louser_alloc): unknown section type 0x80000000
+# CHECK-NEXT: error: a.o:(hiuser_alloc): unknown section type 0xffffffff
+
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_X86_64
+Sections:
+  - Name:  relr
+    Type:  19
+  - Name:  regular
+    Type:  21
+  - Name:  loos
+    Type:  0x60000000
+  - Name:  hios
+    Type:  0x6fffffff
+  - Name:  loos_nonconforming
+    Type:  0x60000000
+    Flags: [ SHF_OS_NONCONFORMING ]
+  - Name:  hios_nonconforming
+    Type:  0x6fffffff
+    Flags: [ SHF_OS_NONCONFORMING ]
+
+  - Name:  loproc
+    Type:  0x70000000
+  - Name:  hiproc
+    Type:  0x7fffffff
+
+  - Name:  louser
+    Type:  0x80000000
+  - Name:  hiuser
+    Type:  0xffffffff
+  - Name:  louser_alloc
+    Type:  0x80000000
+    Flags: [ SHF_ALLOC ]
+  - Name:  hiuser_alloc
+    Type:  0xffffffff
+    Flags: [ SHF_ALLOC ]
diff --git a/lldb/cmake/caches/Apple-lldb-Linux.cmake b/lldb/cmake/caches/Apple-lldb-Linux.cmake
index 13d3839..b2d3cf5 100644
--- a/lldb/cmake/caches/Apple-lldb-Linux.cmake
+++ b/lldb/cmake/caches/Apple-lldb-Linux.cmake
@@ -5,4 +5,5 @@ set(LLVM_DISTRIBUTION_COMPONENTS
   liblldb
   lldb-argdumper
   lldb-server
+  lldb-python-scripts
   CACHE STRING "")
diff --git a/lldb/include/lldb/Host/Alarm.h b/lldb/include/lldb/Host/Alarm.h
new file mode 100644
index 0000000..23b1ff1
--- /dev/null
+++ b/lldb/include/lldb/Host/Alarm.h
@@ -0,0 +1,115 @@
+//===-- Alarm.h -------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_HOST_ALARM_H
+#define LLDB_HOST_ALARM_H
+
+#include "lldb/Host/HostThread.h"
+#include "lldb/lldb-types.h"
+#include "llvm/Support/Chrono.h"
+
+#include <condition_variable>
+#include <mutex>
+
+namespace lldb_private {
+
+/// \class Alarm abstraction that enables scheduling a callback function after a
+/// specified timeout. Creating an alarm for a callback returns a Handle that
+/// can be used to restart or cancel the alarm.
+class Alarm {
+public:
+  using Handle = uint64_t;
+  using Callback = std::function<void()>;
+  using TimePoint = llvm::sys::TimePoint<>;
+  using Duration = std::chrono::milliseconds;
+
+  Alarm(Duration timeout, bool run_callback_on_exit = false);
+  ~Alarm();
+
+  /// Create an alarm for the given callback. The alarm will expire and the
+  /// callback will be called after the timeout.
+  ///
+  /// \returns
+  ///   Handle which can be used to restart or cancel the alarm.
+  Handle Create(Callback callback);
+
+  /// Restart the alarm for the given Handle. The alarm will expire and the
+  /// callback will be called after the timeout.
+  ///
+  /// \returns
+  ///   True if the alarm was successfully restarted. False if there is no alarm
+  ///   for the given Handle or the alarm already expired.
+  bool Restart(Handle handle);
+
+  /// Cancel the alarm for the given Handle. The alarm and its handle will be
+  /// removed.
+  ///
+  /// \returns
+  ///   True if the alarm was successfully canceled and the Handle removed.
+  ///   False if there is no alarm for the given Handle or the alarm already
+  ///   expired.
+  bool Cancel(Handle handle);
+
+  static constexpr Handle INVALID_HANDLE = 0;
+
+private:
+  /// Helper functions to start, stop and check the status of the alarm thread.
+  /// @{
+  void StartAlarmThread();
+  void StopAlarmThread();
+  bool AlarmThreadRunning();
+  /// @}
+
+  /// Return an unique, monotonically increasing handle.
+  static Handle GetNextUniqueHandle();
+
+  /// Helper to compute the next time the alarm thread needs to wake up.
+  TimePoint GetNextExpiration() const;
+
+  /// Alarm entry.
+  struct Entry {
+    Handle handle;
+    Callback callback;
+    TimePoint expiration;
+
+    Entry(Callback callback, TimePoint expiration);
+    bool operator==(const Entry &rhs) { return handle == rhs.handle; }
+  };
+
+  /// List of alarm entries.
+  std::vector<Entry> m_entries;
+
+  /// Timeout between when an alarm is created and when it fires.
+  Duration m_timeout;
+
+  /// The alarm thread.
+  /// @{
+  HostThread m_alarm_thread;
+  lldb::thread_result_t AlarmThread();
+  /// @}
+
+  /// Synchronize access between the alarm thread and the main thread.
+  std::mutex m_alarm_mutex;
+
+  /// Condition variable used to wake up the alarm thread.
+  std::condition_variable m_alarm_cv;
+
+  /// Flag to signal the alarm thread that something changed and we need to
+  /// recompute the next alarm.
+  bool m_recompute_next_alarm = false;
+
+  /// Flag to signal the alarm thread to exit.
+  bool m_exit = false;
+
+  /// Flag to signal we should run all callbacks on exit.
+  bool m_run_callbacks_on_exit = false;
+};
+
+} // namespace lldb_private
+
+#endif // LLDB_HOST_ALARM_H
diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h
index 0cbd8a3..67714e6 100644
--- a/lldb/include/lldb/Target/Language.h
+++ b/lldb/include/lldb/Target/Language.h
@@ -26,6 +26,15 @@
 
 namespace lldb_private {
 
+class LanguageProperties : public Properties {
+public:
+  LanguageProperties();
+
+  static llvm::StringRef GetSettingName();
+
+  bool GetEnableFilterForLineBreakpoints() const;
+};
+
 class Language : public PluginInterface {
 public:
   class TypeScavenger {
@@ -324,6 +333,8 @@ public:
   static LanguageSet GetLanguagesSupportingTypeSystemsForExpressions();
   static LanguageSet GetLanguagesSupportingREPLs();
 
+  static LanguageProperties &GetGlobalLanguageProperties();
+
   // Given a mangled function name, calculates some alternative manglings since
   // the compiler mangling may not line up with the symbol we are expecting.
   virtual std::vector<ConstString>
@@ -339,6 +350,15 @@ public:
 
   virtual llvm::StringRef GetInstanceVariableName() { return {}; }
 
+  /// Returns true if this SymbolContext should be ignored when setting
+  /// breakpoints by line (number or regex). Helpful for languages that create
+  /// artificial functions without meaningful user code associated with them
+  /// (e.g. code that gets expanded in late compilation stages, like by
+  /// CoroSplitter).
+  virtual bool IgnoreForLineBreakpoints(const SymbolContext &) const {
+    return false;
+  }
+
 protected:
   // Classes that inherit from Language can see and modify these
 
diff --git a/lldb/source/Breakpoint/BreakpointResolver.cpp b/lldb/source/Breakpoint/BreakpointResolver.cpp
index bc63487..1861a0f 100644
--- a/lldb/source/Breakpoint/BreakpointResolver.cpp
+++ b/lldb/source/Breakpoint/BreakpointResolver.cpp
@@ -23,6 +23,7 @@
 #include "lldb/Symbol/CompileUnit.h"
 #include "lldb/Symbol/Function.h"
 #include "lldb/Symbol/SymbolContext.h"
+#include "lldb/Target/Language.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/LLDBLog.h"
 #include "lldb/Utility/Log.h"
@@ -203,8 +204,15 @@ void BreakpointResolver::SetSCMatchesByLine(
     SearchFilter &filter, SymbolContextList &sc_list, bool skip_prologue,
     llvm::StringRef log_ident, uint32_t line, std::optional<uint16_t> column) {
   llvm::SmallVector<SymbolContext, 16> all_scs;
-  for (uint32_t i = 0; i < sc_list.GetSize(); ++i)
-    all_scs.push_back(sc_list[i]);
+
+  for (const auto &sc : sc_list) {
+    if (Language::GetGlobalLanguageProperties()
+            .GetEnableFilterForLineBreakpoints())
+      if (Language *lang = Language::FindPlugin(sc.GetLanguage());
+          lang && lang->IgnoreForLineBreakpoints(sc))
+        continue;
+    all_scs.push_back(sc);
+  }
 
   while (all_scs.size()) {
     uint32_t closest_line = UINT32_MAX;
diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp
index b2346c2..ae6c6d5 100644
--- a/lldb/source/Commands/CommandObjectTarget.cpp
+++ b/lldb/source/Commands/CommandObjectTarget.cpp
@@ -3377,7 +3377,7 @@ protected:
       case 'r': {
         size_t ref_count = 0;
         char in_shared_cache = 'Y';
-        
+
         ModuleSP module_sp(module->shared_from_this());
         if (!ModuleList::ModuleIsInCache(module))
           in_shared_cache = 'N';
@@ -4508,11 +4508,8 @@ protected:
 
     ModuleSpec module_spec;
     module_spec.GetUUID() = frame_module_sp->GetUUID();
-
-    if (FileSystem::Instance().Exists(frame_module_sp->GetPlatformFileSpec())) {
-      module_spec.GetArchitecture() = frame_module_sp->GetArchitecture();
-      module_spec.GetFileSpec() = frame_module_sp->GetPlatformFileSpec();
-    }
+    module_spec.GetArchitecture() = frame_module_sp->GetArchitecture();
+    module_spec.GetFileSpec() = frame_module_sp->GetPlatformFileSpec();
 
     if (!DownloadObjectAndSymbolFile(module_spec, result, flush)) {
       result.AppendError("unable to find debug symbols for the current frame");
@@ -4557,12 +4554,8 @@ protected:
 
       ModuleSpec module_spec;
       module_spec.GetUUID() = frame_module_sp->GetUUID();
-
-      if (FileSystem::Instance().Exists(
-              frame_module_sp->GetPlatformFileSpec())) {
-        module_spec.GetArchitecture() = frame_module_sp->GetArchitecture();
-        module_spec.GetFileSpec() = frame_module_sp->GetPlatformFileSpec();
-      }
+      module_spec.GetFileSpec() = frame_module_sp->GetPlatformFileSpec();
+      module_spec.GetArchitecture() = frame_module_sp->GetArchitecture();
 
       bool current_frame_flush = false;
       if (DownloadObjectAndSymbolFile(module_spec, result, current_frame_flush))
diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp
index 90aabde..ebd1121 100644
--- a/lldb/source/Core/Debugger.cpp
+++ b/lldb/source/Core/Debugger.cpp
@@ -860,6 +860,9 @@ Debugger::Debugger(lldb::LogOutputCallback log_callback, void *baton)
   m_collection_sp->AppendProperty(
       "symbols", "Symbol lookup and cache settings.", true,
       ModuleList::GetGlobalModuleListProperties().GetValueProperties());
+  m_collection_sp->AppendProperty(
+      LanguageProperties::GetSettingName(), "Language settings.", true,
+      Language::GetGlobalLanguageProperties().GetValueProperties());
   if (m_command_interpreter_up) {
     m_collection_sp->AppendProperty(
         "interpreter",
diff --git a/lldb/source/Host/CMakeLists.txt b/lldb/source/Host/CMakeLists.txt
index fe6e539..c2e091e 100644
--- a/lldb/source/Host/CMakeLists.txt
+++ b/lldb/source/Host/CMakeLists.txt
@@ -13,6 +13,7 @@ macro(add_host_subdirectory group)
 endmacro()
 
 add_host_subdirectory(common
+  common/Alarm.cpp
   common/FileAction.cpp
   common/FileCache.cpp
   common/File.cpp
diff --git a/lldb/source/Host/common/Alarm.cpp b/lldb/source/Host/common/Alarm.cpp
new file mode 100644
index 0000000..80c5447
--- /dev/null
+++ b/lldb/source/Host/common/Alarm.cpp
@@ -0,0 +1,216 @@
+//===-- Alarm.cpp ---------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Host/Alarm.h"
+#include "lldb/Host/ThreadLauncher.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "lldb/Utility/Log.h"
+
+using namespace lldb;
+using namespace lldb_private;
+
+Alarm::Alarm(Duration timeout, bool run_callback_on_exit)
+    : m_timeout(timeout), m_run_callbacks_on_exit(run_callback_on_exit) {
+  StartAlarmThread();
+}
+
+Alarm::~Alarm() { StopAlarmThread(); }
+
+Alarm::Handle Alarm::Create(std::function<void()> callback) {
+  // Gracefully deal with the unlikely event that the alarm thread failed to
+  // launch.
+  if (!AlarmThreadRunning())
+    return INVALID_HANDLE;
+
+  // Compute the next expiration before we take the lock. This ensures that
+  // waiting on the lock doesn't eat into the timeout.
+  const TimePoint expiration = GetNextExpiration();
+
+  Handle handle = INVALID_HANDLE;
+
+  {
+    std::lock_guard alarm_guard(m_alarm_mutex);
+
+    // Create a new unique entry and remember its handle.
+    m_entries.emplace_back(callback, expiration);
+    handle = m_entries.back().handle;
+
+    // Tell the alarm thread we need to recompute the next alarm.
+    m_recompute_next_alarm = true;
+  }
+
+  m_alarm_cv.notify_one();
+  return handle;
+}
+
+bool Alarm::Restart(Handle handle) {
+  // Gracefully deal with the unlikely event that the alarm thread failed to
+  // launch.
+  if (!AlarmThreadRunning())
+    return false;
+
+  // Compute the next expiration before we take the lock. This ensures that
+  // waiting on the lock doesn't eat into the timeout.
+  const TimePoint expiration = GetNextExpiration();
+
+  {
+    std::lock_guard alarm_guard(m_alarm_mutex);
+
+    // Find the entry corresponding to the given handle.
+    const auto it =
+        std::find_if(m_entries.begin(), m_entries.end(),
+                     [handle](Entry &entry) { return entry.handle == handle; });
+    if (it == m_entries.end())
+      return false;
+
+    // Update the expiration.
+    it->expiration = expiration;
+
+    // Tell the alarm thread we need to recompute the next alarm.
+    m_recompute_next_alarm = true;
+  }
+
+  m_alarm_cv.notify_one();
+  return true;
+}
+
+bool Alarm::Cancel(Handle handle) {
+  // Gracefully deal with the unlikely event that the alarm thread failed to
+  // launch.
+  if (!AlarmThreadRunning())
+    return false;
+
+  {
+    std::lock_guard alarm_guard(m_alarm_mutex);
+
+    const auto it =
+        std::find_if(m_entries.begin(), m_entries.end(),
+                     [handle](Entry &entry) { return entry.handle == handle; });
+
+    if (it == m_entries.end())
+      return false;
+
+    m_entries.erase(it);
+  }
+
+  // No need to notify the alarm thread. This only affects the alarm thread if
+  // we removed the entry that corresponds to the next alarm. If that's the
+  // case, the thread will wake up as scheduled, find no expired events, and
+  // recompute the next alarm time.
+  return true;
+}
+
+Alarm::Entry::Entry(Alarm::Callback callback, Alarm::TimePoint expiration)
+    : handle(Alarm::GetNextUniqueHandle()), callback(std::move(callback)),
+      expiration(std::move(expiration)) {}
+
+void Alarm::StartAlarmThread() {
+  if (!m_alarm_thread.IsJoinable()) {
+    llvm::Expected<HostThread> alarm_thread = ThreadLauncher::LaunchThread(
+        "lldb.debugger.alarm-thread", [this] { return AlarmThread(); },
+        8 * 1024 * 1024); // Use larger 8MB stack for this thread
+    if (alarm_thread) {
+      m_alarm_thread = *alarm_thread;
+    } else {
+      LLDB_LOG_ERROR(GetLog(LLDBLog::Host), alarm_thread.takeError(),
+                     "failed to launch host thread: {0}");
+    }
+  }
+}
+
+void Alarm::StopAlarmThread() {
+  if (m_alarm_thread.IsJoinable()) {
+    {
+      std::lock_guard alarm_guard(m_alarm_mutex);
+      m_exit = true;
+    }
+    m_alarm_cv.notify_one();
+    m_alarm_thread.Join(nullptr);
+  }
+}
+
+bool Alarm::AlarmThreadRunning() { return m_alarm_thread.IsJoinable(); }
+
+lldb::thread_result_t Alarm::AlarmThread() {
+  bool exit = false;
+  std::optional<TimePoint> next_alarm;
+
+  const auto predicate = [this] { return m_exit || m_recompute_next_alarm; };
+
+  while (!exit) {
+    // Synchronization between the main thread and the alarm thread using a
+    // mutex and condition variable. There are 2 reasons the thread can wake up:
+    //
+    // 1. The timeout for the next alarm expired.
+    //
+    // 2. The condition variable is notified that one of our shared variables
+    //    (see predicate) was modified. Either the thread is asked to shut down
+    //    or a new alarm came in and we need to recompute the next timeout.
+    //
+    // Below we only deal with the timeout expiring and fall through for dealing
+    // with the rest.
+    std::unique_lock alarm_lock(m_alarm_mutex);
+    if (next_alarm) {
+      if (!m_alarm_cv.wait_until(alarm_lock, *next_alarm, predicate)) {
+        // The timeout for the next alarm expired.
+
+        // Clear the next timeout to signal that we need to recompute the next
+        // timeout.
+        next_alarm.reset();
+
+        // Iterate over all the callbacks. Call the ones that have expired
+        // and remove them from the list.
+        const TimePoint now = std::chrono::system_clock::now();
+        auto it = m_entries.begin();
+        while (it != m_entries.end()) {
+          if (it->expiration <= now) {
+            it->callback();
+            it = m_entries.erase(it);
+          } else {
+            it++;
+          }
+        }
+      }
+    } else {
+      m_alarm_cv.wait(alarm_lock, predicate);
+    }
+
+    // Fall through after waiting on the condition variable. At this point
+    // either the predicate is true or we woke up because an alarm expired.
+
+    // The alarm thread is shutting down.
+    if (m_exit) {
+      exit = true;
+      if (m_run_callbacks_on_exit) {
+        for (Entry &entry : m_entries)
+          entry.callback();
+      }
+      continue;
+    }
+
+    // A new alarm was added or an alarm expired. Either way we need to
+    // recompute when this thread should wake up for the next alarm.
+    if (m_recompute_next_alarm || !next_alarm) {
+      for (Entry &entry : m_entries) {
+        if (!next_alarm || entry.expiration < *next_alarm)
+          next_alarm = entry.expiration;
+      }
+      m_recompute_next_alarm = false;
+    }
+  }
+  return {};
+}
+
+Alarm::TimePoint Alarm::GetNextExpiration() const {
+  return std::chrono::system_clock::now() + m_timeout;
+}
+
+Alarm::Handle Alarm::GetNextUniqueHandle() {
+  static std::atomic<Handle> g_next_handle = 1;
+  return g_next_handle++;
+}
diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
index 3961dcf..7b9938d 100644
--- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
+++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
@@ -652,7 +652,7 @@ size_t ProcessMachCore::ReadMemory(addr_t addr, void *buf, size_t size,
                                    Status &error) {
   // Don't allow the caching that lldb_private::Process::ReadMemory does since
   // in core files we have it all cached our our core file anyway.
-  return DoReadMemory(addr, buf, size, error);
+  return DoReadMemory(FixAnyAddress(addr), buf, size, error);
 }
 
 size_t ProcessMachCore::DoReadMemory(addr_t addr, void *buf, size_t size,
diff --git a/lldb/source/Plugins/SymbolLocator/DebugSymbols/SymbolLocatorDebugSymbols.cpp b/lldb/source/Plugins/SymbolLocator/DebugSymbols/SymbolLocatorDebugSymbols.cpp
index f7df465..69595c6 100644
--- a/lldb/source/Plugins/SymbolLocator/DebugSymbols/SymbolLocatorDebugSymbols.cpp
+++ b/lldb/source/Plugins/SymbolLocator/DebugSymbols/SymbolLocatorDebugSymbols.cpp
@@ -1066,11 +1066,21 @@ bool SymbolLocatorDebugSymbols::DownloadObjectAndSymbolFile(
   command << lookup_arg;
 
   // Log and report progress.
+  std::string lookup_desc;
+  if (uuid_ptr && file_spec_ptr)
+    lookup_desc =
+        llvm::formatv("{0} ({1})", file_spec_ptr->GetFilename().GetString(),
+                      uuid_ptr->GetAsString());
+  else if (uuid_ptr)
+    lookup_desc = uuid_ptr->GetAsString();
+  else if (file_spec_ptr)
+    lookup_desc = file_spec_ptr->GetFilename().GetString();
+
   Log *log = GetLog(LLDBLog::Host);
-  LLDB_LOG(log, "Calling {0} with {1} to find dSYM: {2}", dsymForUUID_exe_path,
-           lookup_arg, command.GetString());
+  LLDB_LOG(log, "Calling {0} for {1} to find dSYM: {2}", dsymForUUID_exe_path,
+           lookup_desc, command.GetString());
 
-  Progress progress("Downloading symbol file", lookup_arg);
+  Progress progress("Downloading symbol file for", lookup_desc);
 
   // Invoke dsymForUUID.
   int exit_status = -1;
diff --git a/lldb/source/Target/Language.cpp b/lldb/source/Target/Language.cpp
index caf3e66..1542c8c 100644
--- a/lldb/source/Target/Language.cpp
+++ b/lldb/source/Target/Language.cpp
@@ -13,6 +13,7 @@
 #include "lldb/Target/Language.h"
 
 #include "lldb/Core/PluginManager.h"
+#include "lldb/Interpreter/OptionValueProperties.h"
 #include "lldb/Symbol/SymbolFile.h"
 #include "lldb/Symbol/TypeList.h"
 #include "lldb/Target/Target.h"
@@ -27,6 +28,35 @@ using namespace lldb_private::formatters;
 typedef std::unique_ptr<Language> LanguageUP;
 typedef std::map<lldb::LanguageType, LanguageUP> LanguagesMap;
 
+#define LLDB_PROPERTIES_language
+#include "TargetProperties.inc"
+
+enum {
+#define LLDB_PROPERTIES_language
+#include "TargetPropertiesEnum.inc"
+};
+
+LanguageProperties &Language::GetGlobalLanguageProperties() {
+  static LanguageProperties g_settings;
+  return g_settings;
+}
+
+llvm::StringRef LanguageProperties::GetSettingName() {
+  static constexpr llvm::StringLiteral g_setting_name("language");
+  return g_setting_name;
+}
+
+LanguageProperties::LanguageProperties() {
+  m_collection_sp = std::make_shared<OptionValueProperties>(GetSettingName());
+  m_collection_sp->Initialize(g_language_properties);
+}
+
+bool LanguageProperties::GetEnableFilterForLineBreakpoints() const {
+  const uint32_t idx = ePropertyEnableFilterForLineBreakpoints;
+  return GetPropertyAtIndexAs<bool>(
+      idx, g_language_properties[idx].default_uint_value != 0);
+}
+
 static LanguagesMap &GetLanguagesMap() {
   static LanguagesMap *g_map = nullptr;
   static llvm::once_flag g_initialize;
diff --git a/lldb/source/Target/TargetProperties.td b/lldb/source/Target/TargetProperties.td
index d2fccdb..7f79218 100644
--- a/lldb/source/Target/TargetProperties.td
+++ b/lldb/source/Target/TargetProperties.td
@@ -311,3 +311,9 @@ let Definition = "thread" in {
     DefaultUnsignedValue<600000>,
     Desc<"Maximum number of frames to backtrace.">;
 }
+
+let Definition = "language" in {
+  def EnableFilterForLineBreakpoints: Property<"enable-filter-for-line-breakpoints", "Boolean">,
+    DefaultTrue,
+    Desc<"If true, allow Language plugins to filter locations when setting breakpoints by line number or regex.">;
+}
diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_options/TestBreakpointOptions.py b/lldb/test/API/functionalities/breakpoint/breakpoint_options/TestBreakpointOptions.py
index 5179ffe..d262b62 100644
--- a/lldb/test/API/functionalities/breakpoint/breakpoint_options/TestBreakpointOptions.py
+++ b/lldb/test/API/functionalities/breakpoint/breakpoint_options/TestBreakpointOptions.py
@@ -95,8 +95,10 @@ class BreakpointOptionsTestCase(TestBase):
         self.expect(
             "breakpoint list -v",
             "Verbose breakpoint list contains mangled names",
+            # The demangled function name is system-dependent, e.g.
+            # 'int ns::func(void)' on Windows and 'ns::func()' on Linux.
             substrs=[
-                "function = ns::func",
+                f"function = {function.GetName()}",
                 f"mangled function = {function.GetMangledName()}",
             ],
         )
diff --git a/lldb/test/API/macosx/tbi-honored/Makefile b/lldb/test/API/macosx/tbi-honored/Makefile
new file mode 100644
index 0000000..1049594
--- /dev/null
+++ b/lldb/test/API/macosx/tbi-honored/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES := main.c
+
+include Makefile.rules
diff --git a/lldb/test/API/macosx/tbi-honored/TestTBIHonored.py b/lldb/test/API/macosx/tbi-honored/TestTBIHonored.py
new file mode 100644
index 0000000..a5c0abd
--- /dev/null
+++ b/lldb/test/API/macosx/tbi-honored/TestTBIHonored.py
@@ -0,0 +1,57 @@
+"""Test that lldb on Darwin ignores metadata in the top byte of addresses, both corefile and live."""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TestTBIHonored(TestBase):
+    NO_DEBUG_INFO_TESTCASE = True
+
+    def do_variable_access_tests(self, frame):
+        self.assertEqual(
+            frame.variables["pb"][0]
+            .GetChildMemberWithName("p")
+            .Dereference()
+            .GetValueAsUnsigned(),
+            15,
+        )
+        addr = frame.variables["pb"][0].GetChildMemberWithName("p").GetValueAsUnsigned()
+        # Confirm that there is metadata in the top byte of our pointer
+        self.assertEqual((addr >> 56) & 0xFF, 0xFE)
+        self.expect("expr -- *pb.p", substrs=["15"])
+        self.expect("frame variable *pb.p", substrs=["15"])
+        self.expect("expr -- *(int*)0x%x" % addr, substrs=["15"])
+
+    # This test is valid on AArch64 systems with TBI mode enabled,
+    # and an address mask that clears the top byte before reading
+    # from memory.
+    @skipUnlessDarwin
+    @skipIf(archs=no_match(["arm64", "arm64e"]))
+    @skipIfRemote
+    def test(self):
+        corefile = self.getBuildArtifact("process.core")
+        self.build()
+        (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(
+            self, "// break here", lldb.SBFileSpec("main.c")
+        )
+
+        # Test that we can dereference a pointer with TBI data
+        # in a live process.
+        self.do_variable_access_tests(thread.GetFrameAtIndex(0))
+
+        # Create a corefile, delete this process
+        self.runCmd("process save-core -s stack " + corefile)
+        process.Destroy()
+        self.dbg.DeleteTarget(target)
+
+        # Now load the corefile
+        target = self.dbg.CreateTarget("")
+        process = target.LoadCore(corefile)
+        thread = process.GetSelectedThread()
+        self.assertTrue(process.GetSelectedThread().IsValid())
+
+        # Test that we can dereference a pointer with TBI data
+        # in a corefile process.
+        self.do_variable_access_tests(thread.GetFrameAtIndex(0))
diff --git a/lldb/test/API/macosx/tbi-honored/main.c b/lldb/test/API/macosx/tbi-honored/main.c
new file mode 100644
index 0000000..3d7ad0b
--- /dev/null
+++ b/lldb/test/API/macosx/tbi-honored/main.c
@@ -0,0 +1,13 @@
+#include <stdint.h>
+#include <stdio.h>
+union ptrbytes {
+  int *p;
+  uint8_t bytes[8];
+};
+int main() {
+  int c = 15;
+  union ptrbytes pb;
+  pb.p = &c;
+  pb.bytes[7] = 0xfe;
+  printf("%d\n", *pb.p); // break here
+}
diff --git a/lldb/unittests/Host/AlarmTest.cpp b/lldb/unittests/Host/AlarmTest.cpp
new file mode 100644
index 0000000..e589557
--- /dev/null
+++ b/lldb/unittests/Host/AlarmTest.cpp
@@ -0,0 +1,159 @@
+//===-- AlarmTest.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Host/Alarm.h"
+#include "gtest/gtest.h"
+
+#include <chrono>
+#include <thread>
+
+using namespace lldb_private;
+using namespace std::chrono_literals;
+
+// Increase the timeout tenfold when running under ASan as it can have about the
+// same performance overhead.
+#if __has_feature(address_sanitizer)
+static constexpr auto TEST_TIMEOUT = 10000ms;
+#else
+static constexpr auto TEST_TIMEOUT = 1000ms;
+#endif
+
+// The time between scheduling a callback and it getting executed. This should
+// NOT be increased under ASan.
+static constexpr auto ALARM_TIMEOUT = 500ms;
+
+// If there are any pending callbacks, make sure they run before the Alarm
+// object is destroyed.
+static constexpr bool RUN_CALLBACKS_ON_EXIT = true;
+
+TEST(AlarmTest, Create) {
+  std::mutex m;
+
+  std::vector<Alarm::TimePoint> callbacks_actual;
+  std::vector<Alarm::TimePoint> callbacks_expected;
+
+  Alarm alarm(ALARM_TIMEOUT, RUN_CALLBACKS_ON_EXIT);
+
+  // Create 5 alarms some time apart.
+  for (size_t i = 0; i < 5; ++i) {
+    callbacks_actual.emplace_back();
+    callbacks_expected.emplace_back(std::chrono::system_clock::now() +
+                                    ALARM_TIMEOUT);
+
+    alarm.Create([&callbacks_actual, &m, i]() {
+      std::lock_guard guard(m);
+      callbacks_actual[i] = std::chrono::system_clock::now();
+    });
+
+    std::this_thread::sleep_for(ALARM_TIMEOUT / 5);
+  }
+
+  // Leave plenty of time for all the alarms to fire.
+  std::this_thread::sleep_for(TEST_TIMEOUT);
+
+  // Make sure all the alarms fired around the expected time.
+  for (size_t i = 0; i < 5; ++i)
+    EXPECT_GE(callbacks_actual[i], callbacks_expected[i]);
+}
+
+TEST(AlarmTest, Exit) {
+  std::mutex m;
+
+  std::vector<Alarm::Handle> handles;
+  std::vector<bool> callbacks;
+
+  {
+    Alarm alarm(ALARM_TIMEOUT, RUN_CALLBACKS_ON_EXIT);
+
+    // Create 5 alarms.
+    for (size_t i = 0; i < 5; ++i) {
+      callbacks.emplace_back(false);
+
+      handles.push_back(alarm.Create([&callbacks, &m, i]() {
+        std::lock_guard guard(m);
+        callbacks[i] = true;
+      }));
+    }
+
+    // Let the alarm go out of scope before any alarm had a chance to fire.
+  }
+
+  // Make sure none of the alarms fired.
+  for (bool callback : callbacks)
+    EXPECT_TRUE(callback);
+}
+
+TEST(AlarmTest, Cancel) {
+  std::mutex m;
+
+  std::vector<Alarm::Handle> handles;
+  std::vector<bool> callbacks;
+
+  Alarm alarm(ALARM_TIMEOUT, RUN_CALLBACKS_ON_EXIT);
+
+  // Create 5 alarms.
+  for (size_t i = 0; i < 5; ++i) {
+    callbacks.emplace_back(false);
+
+    handles.push_back(alarm.Create([&callbacks, &m, i]() {
+      std::lock_guard guard(m);
+      callbacks[i] = true;
+    }));
+  }
+
+  // Make sure we can cancel the first 4 alarms.
+  for (size_t i = 0; i < 4; ++i)
+    EXPECT_TRUE(alarm.Cancel(handles[i]));
+
+  // Leave plenty of time for all the alarms to fire.
+  std::this_thread::sleep_for(TEST_TIMEOUT);
+
+  // Make sure none of the first 4 alarms fired.
+  for (size_t i = 0; i < 4; ++i)
+    EXPECT_FALSE(callbacks[i]);
+
+  // Make sure the fifth alarm still fired.
+  EXPECT_TRUE(callbacks[4]);
+}
+
+TEST(AlarmTest, Restart) {
+  std::mutex m;
+
+  std::vector<Alarm::Handle> handles;
+  std::vector<Alarm::TimePoint> callbacks_actual;
+  std::vector<Alarm::TimePoint> callbacks_expected;
+
+  Alarm alarm(ALARM_TIMEOUT, RUN_CALLBACKS_ON_EXIT);
+
+  // Create 5 alarms some time apart.
+  for (size_t i = 0; i < 5; ++i) {
+    callbacks_actual.emplace_back();
+    callbacks_expected.emplace_back(std::chrono::system_clock::now() +
+                                    ALARM_TIMEOUT);
+
+    handles.push_back(alarm.Create([&callbacks_actual, &m, i]() {
+      std::lock_guard guard(m);
+      callbacks_actual[i] = std::chrono::system_clock::now();
+    }));
+
+    std::this_thread::sleep_for(ALARM_TIMEOUT / 5);
+  }
+
+  // Update the last 2 alarms.
+  for (size_t i = 3; i < 5; ++i) {
+    callbacks_expected[i] = std::chrono::system_clock::now() + ALARM_TIMEOUT;
+    EXPECT_TRUE(alarm.Restart(handles[i]));
+  }
+
+  // Leave plenty of time for all the alarms to fire.
+  std::this_thread::sleep_for(TEST_TIMEOUT);
+
+  // Make sure all the alarms around the expected time.
+  for (size_t i = 0; i < 5; ++i)
+    EXPECT_GE(callbacks_actual[i], callbacks_expected[i]);
+}
diff --git a/lldb/unittests/Host/CMakeLists.txt b/lldb/unittests/Host/CMakeLists.txt
index c959478..7c09932 100644
--- a/lldb/unittests/Host/CMakeLists.txt
+++ b/lldb/unittests/Host/CMakeLists.txt
@@ -1,4 +1,5 @@
 set (FILES
+  AlarmTest.cpp
   ConnectionFileDescriptorTest.cpp
   FileActionTest.cpp
   FileSystemTest.cpp
diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index f6fb56e..5610880 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -2090,7 +2090,7 @@ function(add_lit_testsuites project directory)
 endfunction()
 
 function(llvm_install_library_symlink name dest type)
-  cmake_parse_arguments(ARG "" "COMPONENT;SOVERSION" "" ${ARGN})
+  cmake_parse_arguments(ARG "FULL_DEST" "COMPONENT" "" ${ARGN})
   foreach(path ${CMAKE_MODULE_PATH})
     if(EXISTS ${path}/LLVMInstallSymlink.cmake)
       set(INSTALL_SYMLINK ${path}/LLVMInstallSymlink.cmake)
@@ -2104,8 +2104,8 @@ function(llvm_install_library_symlink name dest type)
   endif()
 
   set(full_name ${CMAKE_${type}_LIBRARY_PREFIX}${name}${CMAKE_${type}_LIBRARY_SUFFIX})
-  if (ARG_SOVERSION)
-    set(full_dest ${CMAKE_${type}_LIBRARY_PREFIX}${dest}${CMAKE_${type}_LIBRARY_SUFFIX}.${ARG_SOVERSION})
+  if (ARG_FULL_DEST)
+    set(full_dest ${dest})
   else()
     set(full_dest ${CMAKE_${type}_LIBRARY_PREFIX}${dest}${CMAKE_${type}_LIBRARY_SUFFIX})
   endif()
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index fe37e85..29ea500 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1552,6 +1552,25 @@ The AMDGPU backend supports the following calling conventions:
 
      =============================== ==========================================================
 
+AMDGPU MCExpr
+-------------
+
+As part of the AMDGPU MC layer, AMDGPU provides the following target specific
+``MCExpr``\s.
+
+  .. table:: AMDGPU MCExpr types:
+     :name: amdgpu-mcexpr-table
+
+     =================== ================= ========================================================
+     MCExpr              Operands          Return value
+     =================== ================= ========================================================
+     ``max(arg, ...)``   1 or more         Variadic signed operation that returns the maximum
+                                           value of all its arguments.
+
+     ``or(arg, ...)``    1 or more         Variadic signed operation that returns the bitwise-or
+                                           result of all its arguments.
+
+     =================== ================= ========================================================
 
 .. _amdgpu-elf-code-object:
 
diff --git a/llvm/docs/CommandGuide/llvm-debuginfo-analyzer.rst b/llvm/docs/CommandGuide/llvm-debuginfo-analyzer.rst
index a78066a..00c070c 100644
--- a/llvm/docs/CommandGuide/llvm-debuginfo-analyzer.rst
+++ b/llvm/docs/CommandGuide/llvm-debuginfo-analyzer.rst
@@ -16,7 +16,7 @@ DESCRIPTION
 binary object files and prints their contents in a logical view, which
 is a human readable representation that closely matches the structure
 of the original user source code. Supported object file formats include
-ELF, Mach-O, PDB and COFF.
+ELF, Mach-O, WebAssembly, PDB and COFF.
 
 The **logical view** abstracts the complexity associated with the
 different low-level representations of the debugging information that
@@ -468,8 +468,9 @@ If the <pattern> criteria is too general, a more selective option can
 be specified to target a particular category of elements:
 lines (:option:`--select-lines`), scopes (:option:`--select-scopes`),
 symbols (:option:`--select-symbols`) and types (:option:`--select-types`).
+
 These options require knowledge of the debug information format (DWARF,
-CodeView, COFF), as the given **kind** describes a very specific type
+CodeView), as the given **kind** describes a very specific type
 of element.
 
 LINES
@@ -598,7 +599,7 @@ When comparing logical views created from different debug formats, its
 accuracy depends on how close the debug information represents the
 user code. For instance, a logical view created from a binary file with
 DWARF debug information may include more detailed data than a logical
-view created from a binary file with CodeView/COFF debug information.
+view created from a binary file with CodeView debug information.
 
 The following options describe the elements to compare.
 
@@ -1952,6 +1953,315 @@ The **{Coverage}** and **{Location}** attributes describe the debug
 location and coverage for logical symbols. For optimized code, the
 coverage value decreases and it affects the program debuggability.
 
+WEBASSEMBLY SUPPORT
+~~~~~~~~~~~~~~~~~~~
+The below example is used to show the WebAssembly output generated by
+:program:`llvm-debuginfo-analyzer`. We compiled the example for a
+WebAssembly 32-bit target with Clang (-O0 -g --target=wasm32):
+
+.. code-block:: c++
+
+  1  using INTPTR = const int *;
+  2  int foo(INTPTR ParamPtr, unsigned ParamUnsigned, bool ParamBool) {
+  3    if (ParamBool) {
+  4      typedef int INTEGER;
+  5      const INTEGER CONSTANT = 7;
+  6      return CONSTANT;
+  7    }
+  8    return ParamUnsigned;
+  9  }
+
+PRINT BASIC DETAILS
+^^^^^^^^^^^^^^^^^^^
+The following command prints basic details for all the logical elements
+sorted by the debug information internal offset; it includes its lexical
+level and debug info format.
+
+.. code-block:: none
+
+  llvm-debuginfo-analyzer --attribute=level,format
+                          --output-sort=offset
+                          --print=scopes,symbols,types,lines,instructions
+                          test-clang.wasm
+
+or
+
+.. code-block:: none
+
+  llvm-debuginfo-analyzer --attribute=level,format
+                          --output-sort=offset
+                          --print=elements
+                          test-clang.wasm
+
+Each row represents an element that is present within the debug
+information. The first column represents the scope level, followed by
+the associated line number (if any), and finally the description of
+the element.
+
+.. code-block:: none
+
+  Logical View:
+  [000]           {File} 'test-clang.wasm' -> WASM
+
+  [001]             {CompileUnit} 'test.cpp'
+  [002]     2         {Function} extern not_inlined 'foo' -> 'int'
+  [003]     2           {Parameter} 'ParamPtr' -> 'INTPTR'
+  [003]     2           {Parameter} 'ParamUnsigned' -> 'unsigned int'
+  [003]     2           {Parameter} 'ParamBool' -> 'bool'
+  [003]                 {Block}
+  [004]     5             {Variable} 'CONSTANT' -> 'const INTEGER'
+  [004]     5             {Line}
+  [004]                   {Code} 'i32.const	7'
+  [004]                   {Code} 'local.set	10'
+  [004]                   {Code} 'local.get	5'
+  [004]                   {Code} 'local.get	10'
+  [004]                   {Code} 'i32.store	12'
+  [004]     6             {Line}
+  [004]                   {Code} 'i32.const	7'
+  [004]                   {Code} 'local.set	11'
+  [004]                   {Code} 'local.get	5'
+  [004]                   {Code} 'local.get	11'
+  [004]                   {Code} 'i32.store	28'
+  [004]                   {Code} 'br      	1'
+  [004]     -             {Line}
+  [004]                   {Code} 'end'
+  [003]     4           {TypeAlias} 'INTEGER' -> 'int'
+  [003]     2           {Line}
+  [003]                 {Code} 'nop'
+  [003]                 {Code} 'end'
+  [003]                 {Code} 'i64.div_s'
+  [003]                 {Code} 'global.get	0'
+  [003]                 {Code} 'local.set	3'
+  [003]                 {Code} 'i32.const	32'
+  [003]                 {Code} 'local.set	4'
+  [003]                 {Code} 'local.get	3'
+  [003]                 {Code} 'local.get	4'
+  [003]                 {Code} 'i32.sub'
+  [003]                 {Code} 'local.set	5'
+  [003]                 {Code} 'local.get	5'
+  [003]                 {Code} 'local.get	0'
+  [003]                 {Code} 'i32.store	24'
+  [003]                 {Code} 'local.get	5'
+  [003]                 {Code} 'local.get	1'
+  [003]                 {Code} 'i32.store	20'
+  [003]                 {Code} 'local.get	2'
+  [003]                 {Code} 'local.set	6'
+  [003]                 {Code} 'local.get	5'
+  [003]                 {Code} 'local.get	6'
+  [003]                 {Code} 'i32.store8	19'
+  [003]     3           {Line}
+  [003]                 {Code} 'local.get	5'
+  [003]                 {Code} 'i32.load8_u	19'
+  [003]                 {Code} 'local.set	7'
+  [003]     3           {Line}
+  [003]                 {Code} 'i32.const	1'
+  [003]                 {Code} 'local.set	8'
+  [003]                 {Code} 'local.get	7'
+  [003]                 {Code} 'local.get	8'
+  [003]                 {Code} 'i32.and'
+  [003]                 {Code} 'local.set	9'
+  [003]                 {Code} 'block'
+  [003]                 {Code} 'block'
+  [003]                 {Code} 'local.get	9'
+  [003]                 {Code} 'i32.eqz'
+  [003]                 {Code} 'br_if   	0'
+  [003]     8           {Line}
+  [003]                 {Code} 'local.get	5'
+  [003]                 {Code} 'i32.load	20'
+  [003]                 {Code} 'local.set	12'
+  [003]     8           {Line}
+  [003]                 {Code} 'local.get	5'
+  [003]                 {Code} 'local.get	12'
+  [003]                 {Code} 'i32.store	28'
+  [003]     -           {Line}
+  [003]                 {Code} 'end'
+  [003]     9           {Line}
+  [003]                 {Code} 'local.get	5'
+  [003]                 {Code} 'i32.load	28'
+  [003]                 {Code} 'local.set	13'
+  [003]                 {Code} 'local.get	13'
+  [003]                 {Code} 'return'
+  [003]                 {Code} 'end'
+  [003]     9           {Line}
+  [003]                 {Code} 'unreachable'
+  [002]     1         {TypeAlias} 'INTPTR' -> '* const int'
+
+SELECT LOGICAL ELEMENTS
+^^^^^^^^^^^^^^^^^^^^^^^
+The following prints all *instructions*, *symbols* and *types* that
+contain **'block'** or **'.store'** in their names or types, using a tab
+layout and given the number of matches.
+
+.. code-block:: none
+
+  llvm-debuginfo-analyzer --attribute=level
+                          --select-nocase --select-regex
+                          --select=BLOCK --select=.store
+                          --report=list
+                          --print=symbols,types,instructions,summary
+                          test-clang.wasm
+
+  Logical View:
+  [000]           {File} 'test-clang.wasm'
+
+  [001]           {CompileUnit} 'test.cpp'
+  [003]           {Code} 'block'
+  [003]           {Code} 'block'
+  [004]           {Code} 'i32.store	12'
+  [003]           {Code} 'i32.store	20'
+  [003]           {Code} 'i32.store	24'
+  [004]           {Code} 'i32.store	28'
+  [003]           {Code} 'i32.store	28'
+  [003]           {Code} 'i32.store8	19'
+
+  -----------------------------
+  Element      Total    Printed
+  -----------------------------
+  Scopes           3          0
+  Symbols          4          0
+  Types            2          0
+  Lines           62          8
+  -----------------------------
+  Total           71          8
+
+COMPARISON MODE
+^^^^^^^^^^^^^^^
+Given the previous example we found the above debug information issue
+(related to the previous invalid scope location for the **'typedef int
+INTEGER'**) by comparing against another compiler.
+
+Using GCC to generate test-dwarf-gcc.o, we can apply a selection pattern
+with the printing mode to obtain the following logical view output.
+
+.. code-block:: none
+
+  llvm-debuginfo-analyzer --attribute=level
+                          --select-regex --select-nocase --select=INTe
+                          --report=list
+                          --print=symbols,types
+                          test-clang.wasm test-dwarf-gcc.o
+
+  Logical View:
+  [000]           {File} 'test-clang.wasm'
+
+  [001]           {CompileUnit} 'test.cpp'
+  [003]     4     {TypeAlias} 'INTEGER' -> 'int'
+  [004]     5     {Variable} 'CONSTANT' -> 'const INTEGER'
+
+  Logical View:
+  [000]           {File} 'test-dwarf-gcc.o'
+
+  [001]           {CompileUnit} 'test.cpp'
+  [004]     4     {TypeAlias} 'INTEGER' -> 'int'
+  [004]     5     {Variable} 'CONSTANT' -> 'const INTEGER'
+
+The output shows that both objects contain the same elements. But the
+**'typedef INTEGER'** is located at different scope level. The GCC
+generated object, shows **'4'**, which is the correct value.
+
+There are 2 comparison methods: logical view and logical elements.
+
+LOGICAL VIEW
+""""""""""""
+It compares the logical view as a whole unit; for a match, each compared
+logical element must have the same parents and children.
+
+The output shows in view form the **missing (-), added (+)** elements,
+giving more context by swapping the reference and target object files.
+
+.. code-block:: none
+
+  llvm-debuginfo-analyzer --attribute=level
+                          --compare=types
+                          --report=view
+                          --print=symbols,types
+                          test-clang.wasm test-dwarf-gcc.o
+
+  Reference: 'test-clang.wasm'
+  Target:    'test-dwarf-gcc.o'
+
+  Logical View:
+   [000]           {File} 'test-clang.wasm'
+
+   [001]             {CompileUnit} 'test.cpp'
+   [002]     1         {TypeAlias} 'INTPTR' -> '* const int'
+   [002]     2         {Function} extern not_inlined 'foo' -> 'int'
+   [003]                 {Block}
+   [004]     5             {Variable} 'CONSTANT' -> 'const INTEGER'
+  +[004]     4             {TypeAlias} 'INTEGER' -> 'int'
+   [003]     2           {Parameter} 'ParamBool' -> 'bool'
+   [003]     2           {Parameter} 'ParamPtr' -> 'INTPTR'
+   [003]     2           {Parameter} 'ParamUnsigned' -> 'unsigned int'
+  -[003]     4           {TypeAlias} 'INTEGER' -> 'int'
+
+The output shows the merging view path (reference and target) with the
+missing and added elements.
+
+LOGICAL ELEMENTS
+""""""""""""""""
+It compares individual logical elements without considering if their
+parents are the same. For both comparison methods, the equal criteria
+includes the name, source code location, type, lexical scope level.
+
+.. code-block:: none
+
+  llvm-debuginfo-analyzer --attribute=level
+                          --compare=types
+                          --report=list
+                          --print=symbols,types,summary
+                          test-clang.wasm test-dwarf-gcc.o
+
+  Reference: 'test-clang.wasm'
+  Target:    'test-dwarf-gcc.o'
+
+  (1) Missing Types:
+  -[003]     4     {TypeAlias} 'INTEGER' -> 'int'
+
+  (1) Added Types:
+  +[004]     4     {TypeAlias} 'INTEGER' -> 'int'
+
+  ----------------------------------------
+  Element   Expected    Missing      Added
+  ----------------------------------------
+  Scopes           4          0          0
+  Symbols          0          0          0
+  Types            2          1          1
+  Lines            0          0          0
+  ----------------------------------------
+  Total            6          1          1
+
+Changing the *Reference* and *Target* order:
+
+.. code-block:: none
+
+  llvm-debuginfo-analyzer --attribute=level
+                          --compare=types
+                          --report=list
+                          --print=symbols,types,summary
+                          test-dwarf-gcc.o test-clang.wasm
+
+  Reference: 'test-dwarf-gcc.o'
+  Target:    'test-clang.wasm'
+
+  (1) Missing Types:
+  -[004]     4     {TypeAlias} 'INTEGER' -> 'int'
+
+  (1) Added Types:
+  +[003]     4     {TypeAlias} 'INTEGER' -> 'int'
+
+  ----------------------------------------
+  Element   Expected    Missing      Added
+  ----------------------------------------
+  Scopes           4          0          0
+  Symbols          0          0          0
+  Types            2          1          1
+  Lines            0          0          0
+  ----------------------------------------
+  Total            6          1          1
+
+As the *Reference* and *Target* are switched, the *Added Types* from
+the first case now are listed as *Missing Types*.
+
 EXIT STATUS
 -----------
 :program:`llvm-debuginfo-analyzer` returns 0 if the input files were
diff --git a/llvm/docs/PointerAuth.md b/llvm/docs/PointerAuth.md
index 41266b4..a8d2b4d 100644
--- a/llvm/docs/PointerAuth.md
+++ b/llvm/docs/PointerAuth.md
@@ -10,6 +10,9 @@ Before the pointer is used, it needs to be authenticated, i.e., have its
 signature checked.  This prevents pointer values of unknown origin from being
 used to replace the signed pointer value.
 
+For more details, see the clang documentation page for
+[Pointer Authentication](https://clang.llvm.org/docs/PointerAuthentication.html).
+
 At the IR level, it is represented using:
 
 * a [set of intrinsics](#intrinsics) (to sign/authenticate pointers)
diff --git a/llvm/docs/RemoveDIsDebugInfo.md b/llvm/docs/RemoveDIsDebugInfo.md
index a985773..df66c26 100644
--- a/llvm/docs/RemoveDIsDebugInfo.md
+++ b/llvm/docs/RemoveDIsDebugInfo.md
@@ -24,7 +24,7 @@ The debug records are not instructions, do not appear in the instruction list, a
 
 # Great, what do I need to do!
 
-Approximately nothing -- we've already instrumented all of LLVM to handle these new records ("`DPValues`") and behave identically to past LLVM behaviour. We plan on turning this on by default some time soon, with IR converted to the intrinsic form of debug info at terminals (textual IR, bitcode) for a short while, before then changing the textual IR and bitcode formats.
+Approximately nothing -- we've already instrumented all of LLVM to handle these new records ("`DbgRecords`") and behave identically to past LLVM behaviour. We plan on turning this on by default some time soon, with IR converted to the intrinsic form of debug info at terminals (textual IR, bitcode) for a short while, before then changing the textual IR and bitcode formats.
 
 There are two significant changes to be aware of. Firstly, we're adding a single bit of debug relevant data to the `BasicBlock::iterator` class (it's so that we can determine whether ranges intend on including debug info at the beginning of a block or not). That means when writing passes that insert LLVM IR instructions, you need to identify positions with `BasicBlock::iterator` rather than just a bare `Instruction *`. Most of the time this means that after identifying where you intend on inserting something, you must also call `getIterator` on the instruction position -- however when inserting at the start of a block you _must_ use `getFirstInsertionPt`, `getFirstNonPHIIt` or `begin` and use that iterator to insert, rather than just fetching a pointer to the first instruction.
 
@@ -40,13 +40,15 @@ This will all happen transparently without needing to think about it!
 
 ## What exactly have you replaced debug intrinsics with?
 
-We're using a dedicated C++ class called `DPValue` to store debug info, with a one-to-one relationship between each instance of a debug intrinsic and each `DPValue` object in any LLVM IR program. This class stores exactly the same information as is stored in debugging intrinsics. It also has almost entirely the same set of methods, that behave in the same way:
+We're using a dedicated C++ class called `DbgRecord` to store debug info, with a one-to-one relationship between each instance of a debug intrinsic and each `DbgRecord` object in any LLVM IR program; these `DbgRecord`s are represented in the IR as non-instruction debug records, as described in the [Source Level Debugging](project:SourceLevelDebugging.rst#Debug Records) document. This class has a set of subclasses that store exactly the same information as is stored in debugging intrinsics. Each one also has almost entirely the same set of methods, that behave in the same way:
 
+  https://llvm.org/docs/doxygen/classllvm_1_1DbgRecord.html
   https://llvm.org/docs/doxygen/classllvm_1_1DPValue.html
+  https://llvm.org/docs/doxygen/classllvm_1_1DPLabel.html
 
-This allows you to treat a `DPValue` as if it's a `dbg.value` intrinsic most of the time, for example in generic (auto-param) lambdas.
+This allows you to treat a `DPValue` as if it's a `dbg.value`/`dbg.declare`/`dbg.assign` intrinsic most of the time, for example in generic (auto-param) lambdas, and the same for `DPLabel` and `dbg.label`s.
 
-## How do these DPValues fit into the instruction stream?
+## How do these `DbgRecords` fit into the instruction stream?
 
 Like so:
 
@@ -60,21 +62,21 @@ Like so:
                          |
                          v
                   +------------+
-            <-----+  DPMarker  |<----
-           /      +------------+     \
-          /                           \
-         /                             \
-        v                               ^
- +-----------+    +-----------+   +-----------+
- |  DPValue  +--->|  DPValue  +-->|  DPValue  |
- +-----------+    +-----------+   +-----------+
+          <-------+  DPMarker  |<-------
+         /        +------------+        \
+        /                                \
+       /                                  \
+      v                                    ^
+ +-------------+    +-------------+   +-------------+
+ |  DbgRecord  +--->|  DbgRecord  +-->|  DbgRecord  |
+ +-------------+    +-------------+   +-------------+
 ```
 
-Each instruction has a pointer to a `DPMarker` (which will become optional), that contains a list of `DPValue` objects. No debugging records appear in the instruction list at all. `DPValue`s have a parent pointer to their owning `DPMarker`, and each `DPMarker` has a pointer back to it's owning instruction.
+Each instruction has a pointer to a `DPMarker` (which will become optional), that contains a list of `DbgRecord` objects. No debugging records appear in the instruction list at all. `DbgRecord`s have a parent pointer to their owning `DPMarker`, and each `DPMarker` has a pointer back to it's owning instruction.
 
-Not shown are the links from DPValues to other parts of the `Value`/`Metadata` hierachy: `DPValue`s have raw pointers to `DILocalVariable`, `DIExpression` and `DILocation` objects, and references to `Value`s are stored in a `DebugValueUser` base class. This refers to a `ValueAsMetadata` object referring to `Value`s, via the `TrackingMetadata` facility.
+Not shown are the links from DbgRecord to other parts of the `Value`/`Metadata` hierachy: `DbgRecord` subclasses have tracking pointers to the DIMetadata that they use, and `DPValue` has references to `Value`s that are stored in a `DebugValueUser` base class. This refers to a `ValueAsMetadata` object referring to `Value`s, via the `TrackingMetadata` facility.
 
-The various kinds of debug intrinsic (value, declare, assign) are all stored in the `DPValue` object, with a "Type" field disamgibuating which is which.
+The various kinds of debug intrinsic (value, declare, assign, label) are all stored in `DbgRecord` subclasses, with a "RecordKind" field distinguishing `DPLabel`s from `DPValue`s, and a `LocationType` field in the `DPValue` class further disambiguating the various debug variable intrinsics it can represent.
 
 ## Finding debug info records
 
@@ -82,15 +84,15 @@ Utilities such as `findDbgUsers` and the like now have an optional argument that
 
 ## Examining debug info records at positions
 
-Call `Instruction::getDbgRecordRange()` to get the range of `DPValue` objects that are attached to an instruction.
+Call `Instruction::getDbgRecordRange()` to get the range of `DbgRecord` objects that are attached to an instruction.
 
 ## Moving around, deleting
 
-You can use `DPValue::removeFromParent` to unlink a `DPValue` from it's marker, and then `BasicBlock::insertDbgRecordBefore` or `BasicBlock::insertDbgRecordAfter` to re-insert the `DPValue` somewhere else. You cannot insert a `DPValue` at an arbitary point in a list of `DPValue`s (if you're doing this with `dbg.value`s then it's unlikely to be correct).
+You can use `DbgRecord::removeFromParent` to unlink a `DbgRecord` from it's marker, and then `BasicBlock::insertDbgRecordBefore` or `BasicBlock::insertDbgRecordAfter` to re-insert the `DbgRecord` somewhere else. You cannot insert a `DbgRecord` at an arbitary point in a list of `DbgRecord`s (if you're doing this with `dbg.value`s then it's unlikely to be correct).
 
-Erase `DPValue`s by calling `eraseFromParent` or `deleteInstr` if it's already been removed.
+Erase `DbgRecord`s by calling `eraseFromParent` or `deleteInstr` if it's already been removed.
 
-## What about dangling `DPValue`s?
+## What about dangling `DbgRecord`s?
 
 If you have a block like so:
 
@@ -101,6 +103,6 @@ If you have a block like so:
       br label %xyzzy
 ```
 
-your optimisation pass may wish to erase the terminator and then do something to the block. This is easy to do when debug info is kept in instructions, but with `DPValue`s there is no trailing instruction to attach the variable information to in the block above, once the terminator is erased. For such degenerate blocks, `DPValue`s are stored temporarily in a map in `LLVMContext`, and are re-inserted when a terminator is reinserted to the block or other instruction inserted at `end()`.
+your optimisation pass may wish to erase the terminator and then do something to the block. This is easy to do when debug info is kept in instructions, but with `DbgRecord`s there is no trailing instruction to attach the variable information to in the block above, once the terminator is erased. For such degenerate blocks, `DbgRecord`s are stored temporarily in a map in `LLVMContext`, and are re-inserted when a terminator is reinserted to the block or other instruction inserted at `end()`.
 
 This can technically lead to trouble in the vanishingly rare scenario where an optimisation pass erases a terminator and then decides to erase the whole block. (We recommend not doing that).
diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst
index 2eeb076..155cb33 100644
--- a/llvm/docs/UserGuides.rst
+++ b/llvm/docs/UserGuides.rst
@@ -181,7 +181,7 @@ Optimizations
 
 :doc:`RemoveDIsDebugInfo`
    This is a migration guide describing how to move from debug info using
-   intrinsics such as dbg.value to using the non-instruction DPValue object.
+   intrinsics such as dbg.value to using the non-instruction DbgRecord object.
 
 :doc:`InstrProfileFormat`
    This document explains two binary formats of instrumentation-based profiles.
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index bea3e28..1abea9e 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -997,6 +997,12 @@ public:
   APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
   APInt ushl_ov(unsigned Amt, bool &Overflow) const;
 
+  /// Signed integer floor division operation.
+  ///
+  /// Rounds towards negative infinity, i.e. 5 / -2 = -3. Iff minimum value
+  /// divided by -1 set Overflow to true.
+  APInt sfloordiv_ov(const APInt &RHS, bool &Overflow) const;
+
   // Operations that saturate
   APInt sadd_sat(const APInt &RHS) const;
   APInt uadd_sat(const APInt &RHS) const;
@@ -2198,6 +2204,18 @@ inline const APInt abdu(const APInt &A, const APInt &B) {
   return A.uge(B) ? (A - B) : (B - A);
 }
 
+/// Compute the floor of the signed average of C1 and C2
+APInt avgFloorS(const APInt &C1, const APInt &C2);
+
+/// Compute the floor of the unsigned average of C1 and C2
+APInt avgFloorU(const APInt &C1, const APInt &C2);
+
+/// Compute the ceil of the signed average of C1 and C2
+APInt avgCeilS(const APInt &C1, const APInt &C2);
+
+/// Compute the ceil of the unsigned average of C1 and C2
+APInt avgCeilU(const APInt &C1, const APInt &C2);
+
 /// Compute GCD of two unsigned APInt values.
 ///
 /// This function returns the greatest common divisor of the two APInt values
diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h
index fd885a8..c54b1e8 100644
--- a/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -22,6 +22,11 @@
 #include <stdint.h>
 
 namespace llvm {
+
+namespace Intrinsic {
+using ID = unsigned;
+}
+
 class APInt;
 template <typename T> class ArrayRef;
 class CallBase;
@@ -187,6 +192,10 @@ Constant *ConstantFoldCall(const CallBase *Call, Function *F,
                            ArrayRef<Constant *> Operands,
                            const TargetLibraryInfo *TLI = nullptr);
 
+Constant *ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
+                                      Constant *RHS, Type *Ty,
+                                      Instruction *FMFSource);
+
 /// ConstantFoldLoadThroughBitcast - try to cast constant to destination type
 /// returning null if unsuccessful. Can cast pointer to pointer or pointer to
 /// integer and vice versa if their sizes are equal.
diff --git a/llvm/include/llvm/Analysis/InstSimplifyFolder.h b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
index 23e2ea8..8a3269d 100644
--- a/llvm/include/llvm/Analysis/InstSimplifyFolder.h
+++ b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
@@ -117,6 +117,12 @@ public:
     return simplifyCastInst(Op, V, DestTy, SQ);
   }
 
+  Value *FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
+                             Instruction *FMFSource) const override {
+    return simplifyBinaryIntrinsic(ID, Ty, LHS, RHS, SQ,
+                                   dyn_cast_if_present<CallBase>(FMFSource));
+  }
+
   //===--------------------------------------------------------------------===//
   // Cast/Conversion Operators
   //===--------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h
index a29955a..03d7ad12 100644
--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -186,6 +186,11 @@ Value *simplifyExtractElementInst(Value *Vec, Value *Idx,
 Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
                         const SimplifyQuery &Q);
 
+/// Given operands for a BinaryIntrinsic, fold the result or return null.
+Value *simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, Value *Op0,
+                               Value *Op1, const SimplifyQuery &Q,
+                               const CallBase *Call);
+
 /// Given operands for a ShuffleVectorInst, fold the result or return null.
 /// See class ShuffleVectorInst for a description of the mask representation.
 Value *simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef<int> Mask,
diff --git a/llvm/include/llvm/Analysis/TargetFolder.h b/llvm/include/llvm/Analysis/TargetFolder.h
index 978e100..b4105ad 100644
--- a/llvm/include/llvm/Analysis/TargetFolder.h
+++ b/llvm/include/llvm/Analysis/TargetFolder.h
@@ -191,6 +191,15 @@ public:
     return nullptr;
   }
 
+  Value *FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
+                             Instruction *FMFSource) const override {
+    auto *C1 = dyn_cast<Constant>(LHS);
+    auto *C2 = dyn_cast<Constant>(RHS);
+    if (C1 && C2)
+      return ConstantFoldBinaryIntrinsic(ID, C1, C2, Ty, FMFSource);
+    return nullptr;
+  }
+
   //===--------------------------------------------------------------------===//
   // Cast/Conversion Operators
   //===--------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 4eab357..c43a1b5 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1247,13 +1247,16 @@ public:
   /// cases or optimizations based on those values.
   /// \p CxtI is the optional original context instruction, if one exists, to
   /// provide even more information.
+  /// \p TLibInfo is used to search for platform specific vector library
+  /// functions for instructions that might be converted to calls (e.g. frem).
   InstructionCost getArithmeticInstrCost(
       unsigned Opcode, Type *Ty,
       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
       TTI::OperandValueInfo Opd1Info = {TTI::OK_AnyValue, TTI::OP_None},
       TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
-      const Instruction *CxtI = nullptr) const;
+      const Instruction *CxtI = nullptr,
+      const TargetLibraryInfo *TLibInfo = nullptr) const;
 
   /// Returns the cost estimation for alternating opcode pattern that can be
   /// lowered to a single instruction on the target. In X86 this is for the
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index c0a52d6..39303e6 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -624,6 +624,17 @@ enum FunctionCodes {
                                   //             operation, align, vol,
                                   //             ordering, synchscope]
   FUNC_CODE_BLOCKADDR_USERS = 60, // BLOCKADDR_USERS: [value...]
+
+  FUNC_CODE_DEBUG_RECORD_VALUE =
+      61, // [DILocation, DILocalVariable, DIExpression, ValueAsMetadata]
+  FUNC_CODE_DEBUG_RECORD_DECLARE =
+      62, // [DILocation, DILocalVariable, DIExpression, ValueAsMetadata]
+  FUNC_CODE_DEBUG_RECORD_ASSIGN =
+      63, // [DILocation, DILocalVariable, DIExpression, ValueAsMetadata,
+          //  DIAssignID, DIExpression (addr), ValueAsMetadata (addr)]
+  FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE =
+      64, // [DILocation, DILocalVariable, DIExpression, Value]
+  FUNC_CODE_DEBUG_RECORD_LABEL = 65, // [DILocation, DILabel]
 };
 
 enum UseListCodes {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 2372863..9e8fc5d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -697,10 +697,6 @@ public:
   bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo);
 
   /// Match:
-  /// (G_*ADDO x, 0) -> x + no carry out
-  bool matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo);
-
-  /// Match:
   /// (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
   /// (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
   bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo);
@@ -810,12 +806,15 @@ public:
   /// Combine selects.
   bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo);
 
-  /// Combine ands,
+  /// Combine ands.
   bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
 
-  /// Combine ors,
+  /// Combine ors.
   bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo);
 
+  /// Combine addos.
+  bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -919,6 +918,7 @@ private:
   bool isZeroOrZeroSplat(Register Src, bool AllowUndefs);
   bool isConstantSplatVector(Register Src, int64_t SplatValue,
                              bool AllowUndefs);
+  bool isConstantOrConstantVectorI(Register Src) const;
 
   std::optional<APInt> getConstantOrConstantSplatVector(Register Src);
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index f5a6528..6b03703 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -359,6 +359,8 @@ public:
   Register getCarryOutReg() const { return getReg(1); }
   MachineOperand &getLHS() { return getOperand(2); }
   MachineOperand &getRHS() { return getOperand(3); }
+  Register getLHSReg() const { return getOperand(2).getReg(); }
+  Register getRHSReg() const { return getOperand(3).getReg(); }
 
   static bool classof(const MachineInstr *MI) {
     switch (MI->getOpcode()) {
@@ -429,6 +431,23 @@ public:
   }
 };
 
+/// Represents overflowing add operations.
+/// G_UADDO, G_SADDO
+class GAddCarryOut : public GBinOpCarryOut {
+public:
+  bool isSigned() const { return getOpcode() == TargetOpcode::G_SADDO; }
+
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_UADDO:
+    case TargetOpcode::G_SADDO:
+      return true;
+    default:
+      return false;
+    }
+  }
+};
+
 /// Represents overflowing add/sub operations that also consume a carry-in.
 /// G_UADDE, G_SADDE, G_USUBE, G_SSUBE
 class GAddSubCarryInOut : public GAddSubCarryOut {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 29f675b..cea0e0e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -243,6 +243,14 @@ private:
   bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
                         unsigned Opcode);
 
+  // Translate @llvm.experimental.vector.interleave2 and
+  // @llvm.experimental.vector.deinterleave2 intrinsics for fixed-width vector
+  // types into vector shuffles.
+  bool translateVectorInterleave2Intrinsic(const CallInst &CI,
+                                           MachineIRBuilder &MIRBuilder);
+  bool translateVectorDeinterleave2Intrinsic(const CallInst &CI,
+                                             MachineIRBuilder &MIRBuilder);
+
   void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder);
 
   bool translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
diff --git a/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/llvm/include/llvm/CodeGen/MachineLoopInfo.h
index 445c9b1..967c4a7 100644
--- a/llvm/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineLoopInfo.h
@@ -89,6 +89,9 @@ public:
 private:
   friend class LoopInfoBase<MachineBasicBlock, MachineLoop>;
 
+  /// Returns true if the given physreg has no defs inside the loop.
+  bool isLoopInvariantImplicitPhysReg(Register Reg) const;
+
   explicit MachineLoop(MachineBasicBlock *MBB)
     : LoopBase<MachineBasicBlock, MachineLoop>(MBB) {}
 
diff --git a/llvm/include/llvm/CodeGen/MachinePassManager.h b/llvm/include/llvm/CodeGen/MachinePassManager.h
index 7713c55..b1166e3a 100644
--- a/llvm/include/llvm/CodeGen/MachinePassManager.h
+++ b/llvm/include/llvm/CodeGen/MachinePassManager.h
@@ -72,6 +72,7 @@ template <typename PassT> struct MachinePassModel : MachinePassConcept {
     return *this;
   }
 
+  MachinePassModel &operator=(const MachinePassModel &) = delete;
   PreservedAnalyses run(MachineFunction &IR,
                         MachineFunctionAnalysisManager &AM) override {
     return Pass.run(IR, AM);
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index a86c740..96ab6b1 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -511,6 +511,26 @@ inline BinaryOpc_match<LHS, RHS, true> m_Xor(const LHS &L, const RHS &R) {
 }
 
 template <typename LHS, typename RHS>
+inline BinaryOpc_match<LHS, RHS, true> m_SMin(const LHS &L, const RHS &R) {
+  return BinaryOpc_match<LHS, RHS, true>(ISD::SMIN, L, R);
+}
+
+template <typename LHS, typename RHS>
+inline BinaryOpc_match<LHS, RHS, true> m_SMax(const LHS &L, const RHS &R) {
+  return BinaryOpc_match<LHS, RHS, true>(ISD::SMAX, L, R);
+}
+
+template <typename LHS, typename RHS>
+inline BinaryOpc_match<LHS, RHS, true> m_UMin(const LHS &L, const RHS &R) {
+  return BinaryOpc_match<LHS, RHS, true>(ISD::UMIN, L, R);
+}
+
+template <typename LHS, typename RHS>
+inline BinaryOpc_match<LHS, RHS, true> m_UMax(const LHS &L, const RHS &R) {
+  return BinaryOpc_match<LHS, RHS, true>(ISD::UMAX, L, R);
+}
+
+template <typename LHS, typename RHS>
 inline BinaryOpc_match<LHS, RHS, false> m_UDiv(const LHS &L, const RHS &R) {
   return BinaryOpc_match<LHS, RHS, false>(ISD::UDIV, L, R);
 }
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index e7c9ecd..117d3f7 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -572,6 +572,12 @@ public:
     return false;
   }
 
+  /// Returns true if MachineLoopInfo should analyze the given physreg
+  /// for loop invariance.
+  virtual bool shouldAnalyzePhysregInMachineLoopInfo(MCRegister R) const {
+    return false;
+  }
+
   /// Physical registers that may be modified within a function but are
   /// guaranteed to be restored before any uses. This is useful for targets that
   /// have call sequences where a GOT register may be updated by the caller
diff --git a/llvm/include/llvm/CodeGenTypes/LowLevelType.h b/llvm/include/llvm/CodeGenTypes/LowLevelType.h
index 5a16cff..62ee28c 100644
--- a/llvm/include/llvm/CodeGenTypes/LowLevelType.h
+++ b/llvm/include/llvm/CodeGenTypes/LowLevelType.h
@@ -45,6 +45,14 @@ public:
                /*AddressSpace=*/0};
   }
 
+  /// Get a low-level token; just a scalar with zero bits (or no size).
+  static constexpr LLT token() {
+    return LLT{/*isPointer=*/false, /*isVector=*/false,
+               /*isScalar=*/true,   ElementCount::getFixed(0),
+               /*SizeInBits=*/0,
+               /*AddressSpace=*/0};
+  }
+
   /// Get a low-level pointer in the given address space.
   static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits) {
     assert(SizeInBits > 0 && "invalid pointer size");
@@ -134,17 +142,17 @@ public:
 
   explicit LLT(MVT VT);
 
-  constexpr bool isValid() const { return IsScalar || IsPointer || IsVector; }
-
+  constexpr bool isValid() const { return IsScalar || RawData != 0; }
   constexpr bool isScalar() const { return IsScalar; }
-
-  constexpr bool isPointer() const { return IsPointer && !IsVector; }
-
-  constexpr bool isPointerVector() const { return IsPointer && IsVector; }
-
-  constexpr bool isPointerOrPointerVector() const { return IsPointer; }
-
-  constexpr bool isVector() const { return IsVector; }
+  constexpr bool isToken() const { return IsScalar && RawData == 0; };
+  constexpr bool isVector() const { return isValid() && IsVector; }
+  constexpr bool isPointer() const {
+    return isValid() && IsPointer && !IsVector;
+  }
+  constexpr bool isPointerVector() const { return IsPointer && isVector(); }
+  constexpr bool isPointerOrPointerVector() const {
+    return IsPointer && isValid();
+  }
 
   /// Returns the number of elements in a vector LLT. Must only be called on
   /// vector types.
@@ -314,6 +322,28 @@ private:
   /// described in static const *Field variables. Each of these variables
   /// is a 2-element array, with the first element describing the bitfield size
   /// and the second element describing the bitfield offset.
+  ///
+  /// +--------+---------+--------+----------+----------------------+
+  /// |isScalar|isPointer|isVector| RawData  |Notes                 |
+  /// +--------+---------+--------+----------+----------------------+
+  /// |   0    |    0    |   0    |    0     |Invalid               |
+  /// +--------+---------+--------+----------+----------------------+
+  /// |   0    |    0    |   1    |    0     |Tombstone Key         |
+  /// +--------+---------+--------+----------+----------------------+
+  /// |   0    |    1    |   0    |    0     |Empty Key             |
+  /// +--------+---------+--------+----------+----------------------+
+  /// |   1    |    0    |   0    |    0     |Token                 |
+  /// +--------+---------+--------+----------+----------------------+
+  /// |   1    |    0    |   0    | non-zero |Scalar                |
+  /// +--------+---------+--------+----------+----------------------+
+  /// |   0    |    1    |   0    | non-zero |Pointer               |
+  /// +--------+---------+--------+----------+----------------------+
+  /// |   0    |    0    |   1    | non-zero |Vector of non-pointer |
+  /// +--------+---------+--------+----------+----------------------+
+  /// |   0    |    1    |   1    | non-zero |Vector of pointer     |
+  /// +--------+---------+--------+----------+----------------------+
+  ///
+  /// Everything else is reserved.
   typedef int BitFieldInfo[2];
   ///
   /// This is how the bitfields are packed per Kind:
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h
index a66cf46..f76f2ec 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h
@@ -122,6 +122,48 @@ protected:
   std::unique_ptr<MCContext> MC;
   std::unique_ptr<MCInstPrinter> MIP;
 
+  // https://yurydelendik.github.io/webassembly-dwarf/
+  // 2. Consuming and Generating DWARF for WebAssembly Code
+  // Note: Some DWARF constructs don't map one-to-one onto WebAssembly
+  // constructs. We strive to enumerate and resolve any ambiguities here.
+  //
+  // 2.1. Code Addresses
+  // Note: DWARF associates various bits of debug info
+  // with particular locations in the program via its code address (instruction
+  // pointer or PC). However, WebAssembly's linear memory address space does not
+  // contain WebAssembly instructions.
+  //
+  // Wherever a code address (see 2.17 of [DWARF]) is used in DWARF for
+  // WebAssembly, it must be the offset of an instruction relative within the
+  // Code section of the WebAssembly file. The DWARF is considered malformed if
+  // a PC offset is between instruction boundaries within the Code section.
+  //
+  // Note: It is expected that a DWARF consumer does not know how to decode
+  // WebAssembly instructions. The instruction pointer is selected as the offset
+  // in the binary file of the first byte of the instruction, and it is
+  // consistent with the WebAssembly Web API conventions definition of the code
+  // location.
+  //
+  // EXAMPLE: .DEBUG_LINE INSTRUCTION POINTERS
+  // The .debug_line DWARF section maps instruction pointers to source
+  // locations. With WebAssembly, the .debug_line section maps Code
+  // section-relative instruction offsets to source locations.
+  //
+  // EXAMPLE: DW_AT_* ATTRIBUTES
+  // For entities with a single associated code address, DWARF uses
+  // the DW_AT_low_pc attribute to specify the associated code address value.
+  // For WebAssembly, the DW_AT_low_pc's value is a Code section-relative
+  // instruction offset.
+  //
+  // For entities with a single contiguous range of code, DWARF uses a
+  // pair of DW_AT_low_pc and DW_AT_high_pc attributes to specify the associated
+  // contiguous range of code address values. For WebAssembly, these attributes
+  // are Code section-relative instruction offsets.
+  //
+  // For entities with multiple ranges of code, DWARF uses the DW_AT_ranges
+  // attribute, which refers to the array located at the .debug_ranges section.
+  LVAddress WasmCodeSectionOffset = 0;
+
   // Loads all info for the architecture of the provided object file.
   Error loadGenericTargetInfo(StringRef TheTriple, StringRef TheFeatures);
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
index e928faf..2ffde1f 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
@@ -60,9 +60,9 @@ public:
 
     struct BuildVersionOpts {
 
-      // Derive platform from triple.
-      static BuildVersionOpts fromTriple(const Triple &TT, uint32_t MinOS,
-                                         uint32_t SDK);
+      // Derive platform from triple if possible.
+      static std::optional<BuildVersionOpts>
+      fromTriple(const Triple &TT, uint32_t MinOS, uint32_t SDK);
 
       uint32_t Platform; // Platform.
       uint32_t MinOS;    // X.Y.Z is encoded in nibbles xxxx.yy.zz
@@ -73,13 +73,12 @@ public:
     /// will be used.
     std::optional<Dylib> IDDylib;
 
-    /// Override for LC_BUILD_VERSION. If this is nullopt then
-    std::optional<BuildVersionOpts> BuildVersion;
-
     /// List of LC_LOAD_DYLIBs.
     std::vector<Dylib> LoadDylibs;
     /// List of LC_RPATHs.
     std::vector<std::string> RPaths;
+    /// List of LC_BUILD_VERSIONs.
+    std::vector<BuildVersionOpts> BuildVersions;
 
     HeaderOptions() = default;
     HeaderOptions(Dylib D) : IDDylib(std::move(D)) {}
diff --git a/llvm/include/llvm/IR/AutoUpgrade.h b/llvm/include/llvm/IR/AutoUpgrade.h
index 1ef32bc..152f781 100644
--- a/llvm/include/llvm/IR/AutoUpgrade.h
+++ b/llvm/include/llvm/IR/AutoUpgrade.h
@@ -88,9 +88,6 @@ namespace llvm {
   /// info. Return true if module is modified.
   bool UpgradeDebugInfo(Module &M);
 
-  /// Copies module attributes to the functions in the module.
-  void CopyModuleAttrToFunctions(Module &M);
-
   /// Check whether a string looks like an old loop attachment tag.
   inline bool mayBeOldLoopAttachmentTag(StringRef Name) {
     return Name.starts_with("llvm.vectorizer.");
diff --git a/llvm/include/llvm/IR/ConstantFolder.h b/llvm/include/llvm/IR/ConstantFolder.h
index c2b30a6..3e74a56 100644
--- a/llvm/include/llvm/IR/ConstantFolder.h
+++ b/llvm/include/llvm/IR/ConstantFolder.h
@@ -18,8 +18,8 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/Constants.h"
 #include "llvm/IR/ConstantFold.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/IRBuilderFolder.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Operator.h"
@@ -89,7 +89,7 @@ public:
   }
 
   Value *FoldUnOpFMF(Instruction::UnaryOps Opc, Value *V,
-                      FastMathFlags FMF) const override {
+                     FastMathFlags FMF) const override {
     if (Constant *C = dyn_cast<Constant>(V))
       return ConstantFoldUnaryInstruction(Opc, C);
     return nullptr;
@@ -183,6 +183,12 @@ public:
     return nullptr;
   }
 
+  Value *FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
+                             Instruction *FMFSource) const override {
+    // Use TargetFolder or InstSimplifyFolder instead.
+    return nullptr;
+  }
+
   //===--------------------------------------------------------------------===//
   // Cast/Conversion Operators
   //===--------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/DebugProgramInstruction.h b/llvm/include/llvm/IR/DebugProgramInstruction.h
index 1afc925..971c22f 100644
--- a/llvm/include/llvm/IR/DebugProgramInstruction.h
+++ b/llvm/include/llvm/IR/DebugProgramInstruction.h
@@ -511,20 +511,20 @@ public:
   void print(raw_ostream &O, bool IsForDebug = false) const;
   void print(raw_ostream &ROS, ModuleSlotTracker &MST, bool IsForDebug) const;
 
-  /// Filter the DbgRecord range to DPValue types only and downcast.
-  static inline auto
-  filter(iterator_range<simple_ilist<DbgRecord>::iterator> R) {
-    return map_range(
-        make_filter_range(R, [](DbgRecord &E) { return isa<DPValue>(E); }),
-        [](DbgRecord &E) { return std::ref(cast<DPValue>(E)); });
-  }
-
   /// Support type inquiry through isa, cast, and dyn_cast.
   static bool classof(const DbgRecord *E) {
     return E->getRecordKind() == ValueKind;
   }
 };
 
+/// Filter the DbgRecord range to DPValue types only and downcast.
+static inline auto
+filterDbgVars(iterator_range<simple_ilist<DbgRecord>::iterator> R) {
+  return map_range(
+      make_filter_range(R, [](DbgRecord &E) { return isa<DPValue>(E); }),
+      [](DbgRecord &E) { return std::ref(cast<DPValue>(E)); });
+}
+
 /// Per-instruction record of debug-info. If an Instruction is the position of
 /// some debugging information, it points at a DPMarker storing that info. Each
 /// marker points back at the instruction that owns it. Various utilities are
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index f292231..c07ffea 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -962,9 +962,9 @@ public:
 
   /// Create a call to intrinsic \p ID with 2 operands which is mangled on the
   /// first type.
-  CallInst *CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS,
-                                  Instruction *FMFSource = nullptr,
-                                  const Twine &Name = "");
+  Value *CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS,
+                               Instruction *FMFSource = nullptr,
+                               const Twine &Name = "");
 
   /// Create a call to intrinsic \p ID with \p Args, mangled using \p Types. If
   /// \p FMFSource is provided, copy fast-math-flags from that instruction to
@@ -983,7 +983,7 @@ public:
                             const Twine &Name = "");
 
   /// Create call to the minnum intrinsic.
-  CallInst *CreateMinNum(Value *LHS, Value *RHS, const Twine &Name = "") {
+  Value *CreateMinNum(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (IsFPConstrained) {
       return CreateConstrainedFPUnroundedBinOp(
           Intrinsic::experimental_constrained_minnum, LHS, RHS, nullptr, Name);
@@ -993,7 +993,7 @@ public:
   }
 
   /// Create call to the maxnum intrinsic.
-  CallInst *CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name = "") {
+  Value *CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (IsFPConstrained) {
       return CreateConstrainedFPUnroundedBinOp(
           Intrinsic::experimental_constrained_maxnum, LHS, RHS, nullptr, Name);
@@ -1003,19 +1003,19 @@ public:
   }
 
   /// Create call to the minimum intrinsic.
-  CallInst *CreateMinimum(Value *LHS, Value *RHS, const Twine &Name = "") {
+  Value *CreateMinimum(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateBinaryIntrinsic(Intrinsic::minimum, LHS, RHS, nullptr, Name);
   }
 
   /// Create call to the maximum intrinsic.
-  CallInst *CreateMaximum(Value *LHS, Value *RHS, const Twine &Name = "") {
+  Value *CreateMaximum(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS, nullptr, Name);
   }
 
   /// Create call to the copysign intrinsic.
-  CallInst *CreateCopySign(Value *LHS, Value *RHS,
-                           Instruction *FMFSource = nullptr,
-                           const Twine &Name = "") {
+  Value *CreateCopySign(Value *LHS, Value *RHS,
+                        Instruction *FMFSource = nullptr,
+                        const Twine &Name = "") {
     return CreateBinaryIntrinsic(Intrinsic::copysign, LHS, RHS, FMFSource,
                                  Name);
   }
diff --git a/llvm/include/llvm/IR/IRBuilderFolder.h b/llvm/include/llvm/IR/IRBuilderFolder.h
index bd2324d..3020f26 100644
--- a/llvm/include/llvm/IR/IRBuilderFolder.h
+++ b/llvm/include/llvm/IR/IRBuilderFolder.h
@@ -73,6 +73,10 @@ public:
   virtual Value *FoldCast(Instruction::CastOps Op, Value *V,
                           Type *DestTy) const = 0;
 
+  virtual Value *
+  FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
+                      Instruction *FMFSource = nullptr) const = 0;
+
   //===--------------------------------------------------------------------===//
   // Cast/Conversion Operators
   //===--------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 7229292..00536c7 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -29,12 +29,15 @@ def int_dx_dot :
 
 def int_dx_frac  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
 
-def int_dx_lerp :
-    Intrinsic<[LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
-    [llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>,LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
+def int_dx_isinf :
+    DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+    [llvm_anyfloat_ty]>;
+
+def int_dx_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], 
     [IntrNoMem, IntrWillReturn] >;
 
 def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
 def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
 def int_dx_rcp  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+def int_dx_rsqrt  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
 }
diff --git a/llvm/include/llvm/IR/NoFolder.h b/llvm/include/llvm/IR/NoFolder.h
index a612f98..7bb5d5e 100644
--- a/llvm/include/llvm/IR/NoFolder.h
+++ b/llvm/include/llvm/IR/NoFolder.h
@@ -112,6 +112,11 @@ public:
     return nullptr;
   }
 
+  Value *FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
+                             Instruction *FMFSource) const override {
+    return nullptr;
+  }
+
   //===--------------------------------------------------------------------===//
   // Cast/Conversion Operators
   //===--------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 487ae17..382009d 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -68,6 +68,22 @@ template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
   return SubPattern;
 }
 
+template <typename SubPattern_t> struct AllowReassoc_match {
+  SubPattern_t SubPattern;
+
+  AllowReassoc_match(const SubPattern_t &SP) : SubPattern(SP) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    auto *I = dyn_cast<FPMathOperator>(V);
+    return I && I->hasAllowReassoc() && SubPattern.match(I);
+  }
+};
+
+template <typename T>
+inline AllowReassoc_match<T> m_AllowReassoc(const T &SubPattern) {
+  return SubPattern;
+}
+
 template <typename Class> struct class_match {
   template <typename ITy> bool match(ITy *V) { return isa<Class>(V); }
 };
diff --git a/llvm/include/llvm/MC/MCSymbolXCOFF.h b/llvm/include/llvm/MC/MCSymbolXCOFF.h
index 11c3b88..3bf4491 100644
--- a/llvm/include/llvm/MC/MCSymbolXCOFF.h
+++ b/llvm/include/llvm/MC/MCSymbolXCOFF.h
@@ -26,6 +26,8 @@ public:
 
   static bool classof(const MCSymbol *S) { return S->isXCOFF(); }
 
+  enum CodeModel : uint8_t { CM_Small, CM_Large };
+
   static StringRef getUnqualifiedName(StringRef Name) {
     if (Name.back() == ']') {
       StringRef Lhs, Rhs;
@@ -72,8 +74,22 @@ public:
 
   void setEHInfo() const { modifyFlags(SF_EHInfo, SF_EHInfo); }
 
+  bool hasPerSymbolCodeModel() const { return PerSymbolCodeModel.has_value(); }
+
+  CodeModel getPerSymbolCodeModel() const {
+    assert(hasPerSymbolCodeModel() &&
+           "Requested code model for symbol without one");
+    return *PerSymbolCodeModel;
+  }
+
+  void setPerSymbolCodeModel(MCSymbolXCOFF::CodeModel Model) {
+    PerSymbolCodeModel = Model;
+  }
+
 private:
   std::optional<XCOFF::StorageClass> StorageClass;
+  std::optional<CodeModel> PerSymbolCodeModel;
+
   MCSectionXCOFF *RepresentedCsect = nullptr;
   XCOFF::VisibilityType VisibilityType = XCOFF::SYM_V_UNSPECIFIED;
   StringRef SymbolTableName;
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index c9227da..1e73de9 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -389,25 +389,35 @@ protected:
   }
 
   Error getBuildAttributes(ELFAttributeParser &Attributes) const override {
+    uint32_t Type;
+    switch (getEMachine()) {
+    case ELF::EM_ARM:
+      Type = ELF::SHT_ARM_ATTRIBUTES;
+      break;
+    case ELF::EM_RISCV:
+      Type = ELF::SHT_RISCV_ATTRIBUTES;
+      break;
+    default:
+      return Error::success();
+    }
+
     auto SectionsOrErr = EF.sections();
     if (!SectionsOrErr)
       return SectionsOrErr.takeError();
-
     for (const Elf_Shdr &Sec : *SectionsOrErr) {
-      if (Sec.sh_type == ELF::SHT_ARM_ATTRIBUTES ||
-          Sec.sh_type == ELF::SHT_RISCV_ATTRIBUTES) {
-        auto ErrorOrContents = EF.getSectionContents(Sec);
-        if (!ErrorOrContents)
-          return ErrorOrContents.takeError();
-
-        auto Contents = ErrorOrContents.get();
-        if (Contents[0] != ELFAttrs::Format_Version || Contents.size() == 1)
-          return Error::success();
-
-        if (Error E = Attributes.parse(Contents, ELFT::TargetEndianness))
-          return E;
-        break;
-      }
+      if (Sec.sh_type != Type)
+        continue;
+      auto ErrorOrContents = EF.getSectionContents(Sec);
+      if (!ErrorOrContents)
+        return ErrorOrContents.takeError();
+
+      auto Contents = ErrorOrContents.get();
+      if (Contents[0] != ELFAttrs::Format_Version || Contents.size() == 1)
+        return Error::success();
+
+      if (Error E = Attributes.parse(Contents, ELFT::TargetEndianness))
+        return E;
+      break;
     }
     return Error::success();
   }
diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index ef1fac9..770ca87 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -929,12 +929,12 @@ private:
   /// Canonicalize path by removing ".", "..", "./", components. This is
   /// a VFS request, do not bother about symlinks in the path components
   /// but canonicalize in order to perform the correct entry search.
-  std::error_code makeCanonical(SmallVectorImpl<char> &Path) const;
+  std::error_code makeCanonicalForLookup(SmallVectorImpl<char> &Path) const;
 
   /// Get the File status, or error, from the underlying external file system.
   /// This returns the status with the originally requested name, while looking
-  /// up the entry using the canonical path.
-  ErrorOr<Status> getExternalStatus(const Twine &CanonicalPath,
+  /// up the entry using a potentially different path.
+  ErrorOr<Status> getExternalStatus(const Twine &LookupPath,
                                     const Twine &OriginalPath) const;
 
   /// Make \a Path an absolute path.
@@ -1022,7 +1022,7 @@ private:
                  llvm::SmallVectorImpl<Entry *> &Entries) const;
 
   /// Get the status for a path with the provided \c LookupResult.
-  ErrorOr<Status> status(const Twine &CanonicalPath, const Twine &OriginalPath,
+  ErrorOr<Status> status(const Twine &LookupPath, const Twine &OriginalPath,
                          const LookupResult &Result);
 
 public:
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9f18a5b..6980cbd 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1090,12 +1090,6 @@ def mulo_by_0: GICombineRule<
          [{ return Helper.matchMulOBy0(*${root}, ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
 
-def addo_by_0: GICombineRule<
-  (defs root:$root, build_fn_matchinfo:$matchinfo),
-  (match (wip_match_opcode G_UADDO, G_SADDO):$root,
-         [{ return Helper.matchAddOBy0(*${root}, ${matchinfo}); }]),
-  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
-
 // Transform (uadde x, y, 0) -> (uaddo x, y)
 //           (sadde x, y, 0) -> (saddo x, y)
 //           (usube x, y, 0) -> (usubo x, y)
@@ -1291,6 +1285,12 @@ def match_ors : GICombineRule<
         [{ return Helper.matchOr(*${root}, ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
 
+def match_addos : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_SADDO, G_UADDO):$root,
+        [{ return Helper.matchAddOverflow(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
 // Combines concat operations
 def concat_matchinfo : GIDefMatchData<"SmallVector<Register>">;
 def combine_concat_vector : GICombineRule<
@@ -1326,7 +1326,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
 
 def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
                                      overlapping_and, mulo_by_2, mulo_by_0,
-                                     addo_by_0, adde_to_addo,
+                                     adde_to_addo,
                                      combine_minmax_nan]>;
 
 def known_bits_simplifications : GICombineGroup<[
@@ -1374,7 +1374,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
-    combine_concat_vector, double_icmp_zero_and_or_combine]>;
+    combine_concat_vector, double_icmp_zero_and_or_combine, match_addos]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index 37df958..ceb371b 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -455,7 +455,7 @@ public:
 
   virtual Error buildCodeGenPipeline(ModulePassManager &, raw_pwrite_stream &,
                                      raw_pwrite_stream *, CodeGenFileType,
-                                     CGPassBuilderOption,
+                                     const CGPassBuilderOption &,
                                      PassInstrumentationCallbacks *) {
     return make_error<StringError>("buildCodeGenPipeline is not overridden",
                                    inconvertibleErrorCode());
diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td
index 069eb29..d8158eb 100644
--- a/llvm/include/llvm/Target/TargetSchedule.td
+++ b/llvm/include/llvm/Target/TargetSchedule.td
@@ -622,11 +622,22 @@ class BothFusionPredicateWithMCInstPredicate<MCInstPredicate pred>
 // Tie firstOpIdx and secondOpIdx. The operand of `FirstMI` at position
 // `firstOpIdx` should be the same as the operand of `SecondMI` at position
 // `secondOpIdx`.
+// If the fusion has `IsCommutable` being true and the operand at `secondOpIdx`
+// has commutable operand, then the commutable operand will be checked too.
 class TieReg<int firstOpIdx, int secondOpIdx> : BothFusionPredicate {
   int FirstOpIdx = firstOpIdx;
   int SecondOpIdx = secondOpIdx;
 }
 
+// The operand of `SecondMI` at position `firstOpIdx` should be the same as the
+// operand at position `secondOpIdx`.
+// If the fusion has `IsCommutable` being true and the operand at `secondOpIdx`
+// has commutable operand, then the commutable operand will be checked too.
+class SameReg<int firstOpIdx, int secondOpIdx> : SecondFusionPredicate {
+  int FirstOpIdx = firstOpIdx;
+  int SecondOpIdx = secondOpIdx;
+}
+
 // A predicate for wildcard. The generated code will be like:
 // ```
 // if (!FirstMI)
@@ -655,9 +666,12 @@ def OneUse : OneUsePred;
 //   return true;
 // }
 // ```
+//
+// `IsCommutable` means whether we should handle commutable operands.
 class Fusion<string name, string fieldName, string desc, list<FusionPredicate> predicates>
   : SubtargetFeature<name, fieldName, "true", desc> {
   list<FusionPredicate> Predicates = predicates;
+  bit IsCommutable = 0;
 }
 
 // The generated predicator will be like:
@@ -671,6 +685,7 @@ class Fusion<string name, string fieldName, string desc, list<FusionPredicate> p
 //   /* Predicate for `SecondMI` */
 //   /* Wildcard */
 //   /* Predicate for `FirstMI` */
+//   /* Check same registers */
 //   /* Check One Use */
 //   /* Tie registers */
 //   /* Epilog */
@@ -688,11 +703,7 @@ class SimpleFusion<string name, string fieldName, string desc,
                 SecondFusionPredicateWithMCInstPredicate<secondPred>,
                 WildcardTrue,
                 FirstFusionPredicateWithMCInstPredicate<firstPred>,
-                SecondFusionPredicateWithMCInstPredicate<
-                  CheckAny<[
-                    CheckIsVRegOperand<0>,
-                    CheckSameRegOperand<0, 1>
-                  ]>>,
+                SameReg<0, 1>,
                 OneUse,
                 TieReg<0, 1>,
               ],
diff --git a/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h b/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h
index eb00e6c..df61f60 100644
--- a/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h
+++ b/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h
@@ -78,6 +78,7 @@ private:
 
 uint64_t getAllocaSizeInBytes(const AllocaInst &AI);
 void alignAndPadAlloca(memtag::AllocaInfo &Info, llvm::Align Align);
+bool isLifetimeIntrinsic(Value *V);
 
 } // namespace memtag
 } // namespace llvm
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 8b7031e..6b2e88d 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2534,12 +2534,73 @@ static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2,
   return nullptr;
 }
 
-static Constant *ConstantFoldScalarCall2(StringRef Name,
-                                         Intrinsic::ID IntrinsicID,
-                                         Type *Ty,
-                                         ArrayRef<Constant *> Operands,
-                                         const TargetLibraryInfo *TLI,
-                                         const CallBase *Call) {
+static Constant *ConstantFoldLibCall2(StringRef Name, Type *Ty,
+                                      ArrayRef<Constant *> Operands,
+                                      const TargetLibraryInfo *TLI) {
+  if (!TLI)
+    return nullptr;
+
+  LibFunc Func = NotLibFunc;
+  if (!TLI->getLibFunc(Name, Func))
+    return nullptr;
+
+  const auto *Op1 = dyn_cast<ConstantFP>(Operands[0]);
+  if (!Op1)
+    return nullptr;
+
+  const auto *Op2 = dyn_cast<ConstantFP>(Operands[1]);
+  if (!Op2)
+    return nullptr;
+
+  const APFloat &Op1V = Op1->getValueAPF();
+  const APFloat &Op2V = Op2->getValueAPF();
+
+  switch (Func) {
+  default:
+    break;
+  case LibFunc_pow:
+  case LibFunc_powf:
+  case LibFunc_pow_finite:
+  case LibFunc_powf_finite:
+    if (TLI->has(Func))
+      return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+    break;
+  case LibFunc_fmod:
+  case LibFunc_fmodf:
+    if (TLI->has(Func)) {
+      APFloat V = Op1->getValueAPF();
+      if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF()))
+        return ConstantFP::get(Ty->getContext(), V);
+    }
+    break;
+  case LibFunc_remainder:
+  case LibFunc_remainderf:
+    if (TLI->has(Func)) {
+      APFloat V = Op1->getValueAPF();
+      if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF()))
+        return ConstantFP::get(Ty->getContext(), V);
+    }
+    break;
+  case LibFunc_atan2:
+  case LibFunc_atan2f:
+    // atan2(+/-0.0, +/-0.0) is known to raise an exception on some libm
+    // (Solaris), so we do not assume a known result for that.
+    if (Op1V.isZero() && Op2V.isZero())
+      return nullptr;
+    [[fallthrough]];
+  case LibFunc_atan2_finite:
+  case LibFunc_atan2f_finite:
+    if (TLI->has(Func))
+      return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+    break;
+  }
+
+  return nullptr;
+}
+
+static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
+                                            ArrayRef<Constant *> Operands,
+                                            const CallBase *Call) {
   assert(Operands.size() == 2 && "Wrong number of operands.");
 
   if (Ty->isFloatingPointTy()) {
@@ -2569,7 +2630,8 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
         return nullptr;
       const APFloat &Op2V = Op2->getValueAPF();
 
-      if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
+      if (const auto *ConstrIntr =
+              dyn_cast_if_present<ConstrainedFPIntrinsic>(Call)) {
         RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
         APFloat Res = Op1V;
         APFloat::opStatus St;
@@ -2632,52 +2694,6 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
         return ConstantFP::get(Ty->getContext(), Op1V * Op2V);
       }
 
-      if (!TLI)
-        return nullptr;
-
-      LibFunc Func = NotLibFunc;
-      if (!TLI->getLibFunc(Name, Func))
-        return nullptr;
-
-      switch (Func) {
-      default:
-        break;
-      case LibFunc_pow:
-      case LibFunc_powf:
-      case LibFunc_pow_finite:
-      case LibFunc_powf_finite:
-        if (TLI->has(Func))
-          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
-        break;
-      case LibFunc_fmod:
-      case LibFunc_fmodf:
-        if (TLI->has(Func)) {
-          APFloat V = Op1->getValueAPF();
-          if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF()))
-            return ConstantFP::get(Ty->getContext(), V);
-        }
-        break;
-      case LibFunc_remainder:
-      case LibFunc_remainderf:
-        if (TLI->has(Func)) {
-          APFloat V = Op1->getValueAPF();
-          if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF()))
-            return ConstantFP::get(Ty->getContext(), V);
-        }
-        break;
-      case LibFunc_atan2:
-      case LibFunc_atan2f:
-        // atan2(+/-0.0, +/-0.0) is known to raise an exception on some libm
-        // (Solaris), so we do not assume a known result for that.
-        if (Op1V.isZero() && Op2V.isZero())
-          return nullptr;
-        [[fallthrough]];
-      case LibFunc_atan2_finite:
-      case LibFunc_atan2f_finite:
-        if (TLI->has(Func))
-          return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
-        break;
-      }
     } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
       switch (IntrinsicID) {
       case Intrinsic::ldexp: {
@@ -3168,8 +3184,13 @@ static Constant *ConstantFoldScalarCall(StringRef Name,
   if (Operands.size() == 1)
     return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call);
 
-  if (Operands.size() == 2)
-    return ConstantFoldScalarCall2(Name, IntrinsicID, Ty, Operands, TLI, Call);
+  if (Operands.size() == 2) {
+    if (Constant *FoldedLibCall =
+            ConstantFoldLibCall2(Name, Ty, Operands, TLI)) {
+      return FoldedLibCall;
+    }
+    return ConstantFoldIntrinsicCall2(IntrinsicID, Ty, Operands, Call);
+  }
 
   if (Operands.size() == 3)
     return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call);
@@ -3376,6 +3397,13 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
 
 } // end anonymous namespace
 
+Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
+                                            Constant *RHS, Type *Ty,
+                                            Instruction *FMFSource) {
+  return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS},
+                                    dyn_cast_if_present<CallBase>(FMFSource));
+}
+
 Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
                                  ArrayRef<Constant *> Operands,
                                  const TargetLibraryInfo *TLI) {
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index ce65178..d14897a 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6384,11 +6384,10 @@ static Value *foldMinimumMaximumSharedOp(Intrinsic::ID IID, Value *Op0,
   return nullptr;
 }
 
-static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
-                                      const SimplifyQuery &Q,
-                                      const CallBase *Call) {
-  Intrinsic::ID IID = F->getIntrinsicID();
-  Type *ReturnType = F->getReturnType();
+Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
+                                     Value *Op0, Value *Op1,
+                                     const SimplifyQuery &Q,
+                                     const CallBase *Call) {
   unsigned BitWidth = ReturnType->getScalarSizeInBits();
   switch (IID) {
   case Intrinsic::abs:
@@ -6646,19 +6645,21 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
     // float, if the ninf flag is set.
     const APFloat *C;
     if (match(Op1, m_APFloat(C)) &&
-        (C->isInfinity() || (Call->hasNoInfs() && C->isLargest()))) {
+        (C->isInfinity() || (Call && Call->hasNoInfs() && C->isLargest()))) {
       // minnum(X, -inf) -> -inf
       // maxnum(X, +inf) -> +inf
       // minimum(X, -inf) -> -inf if nnan
       // maximum(X, +inf) -> +inf if nnan
-      if (C->isNegative() == IsMin && (!PropagateNaN || Call->hasNoNaNs()))
+      if (C->isNegative() == IsMin &&
+          (!PropagateNaN || (Call && Call->hasNoNaNs())))
         return ConstantFP::get(ReturnType, *C);
 
       // minnum(X, +inf) -> X if nnan
       // maxnum(X, -inf) -> X if nnan
       // minimum(X, +inf) -> X
       // maximum(X, -inf) -> X
-      if (C->isNegative() != IsMin && (PropagateNaN || Call->hasNoNaNs()))
+      if (C->isNegative() != IsMin &&
+          (PropagateNaN || (Call && Call->hasNoNaNs())))
         return Op0;
     }
 
@@ -6672,8 +6673,6 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
     break;
   }
   case Intrinsic::vector_extract: {
-    Type *ReturnType = F->getReturnType();
-
     // (extract_vector (insert_vector _, X, 0), 0) -> X
     unsigned IdxN = cast<ConstantInt>(Op1)->getZExtValue();
     Value *X = nullptr;
@@ -6720,7 +6719,8 @@ static Value *simplifyIntrinsic(CallBase *Call, Value *Callee,
     return simplifyUnaryIntrinsic(F, Args[0], Q, Call);
 
   if (NumOperands == 2)
-    return simplifyBinaryIntrinsic(F, Args[0], Args[1], Q, Call);
+    return simplifyBinaryIntrinsic(IID, F->getReturnType(), Args[0], Args[1], Q,
+                                   Call);
 
   // Handle intrinsics with 3 or more arguments.
   switch (IID) {
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 15311be..2e0bd84 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -9,6 +9,7 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfoImpl.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Dominators.h"
@@ -874,7 +875,22 @@ TargetTransformInfo::getOperandInfo(const Value *V) {
 InstructionCost TargetTransformInfo::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
     OperandValueInfo Op1Info, OperandValueInfo Op2Info,
-    ArrayRef<const Value *> Args, const Instruction *CxtI) const {
+    ArrayRef<const Value *> Args, const Instruction *CxtI,
+    const TargetLibraryInfo *TLibInfo) const {
+
+  // Use call cost for frem intructions that have platform specific vector math
+  // functions, as those will be replaced with calls later by SelectionDAG or
+  // ReplaceWithVecLib pass.
+  if (TLibInfo && Opcode == Instruction::FRem) {
+    VectorType *VecTy = dyn_cast<VectorType>(Ty);
+    LibFunc Func;
+    if (VecTy &&
+        TLibInfo->getLibFunc(Instruction::FRem, Ty->getScalarType(), Func) &&
+        TLibInfo->isFunctionVectorizable(TLibInfo->getName(Func),
+                                         VecTy->getElementCount()))
+      return getCallInstrCost(nullptr, VecTy, {VecTy, VecTy}, CostKind);
+  }
+
   InstructionCost Cost =
       TTIImpl->getArithmeticInstrCost(Opcode, Ty, CostKind,
                                       Op1Info, Op2Info,
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 7005011..c085c71 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -270,6 +270,11 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG)
       STRINGIFY_CODE(FUNC_CODE, INST_CALLBR)
       STRINGIFY_CODE(FUNC_CODE, BLOCKADDR_USERS)
+      STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_DECLARE)
+      STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_VALUE)
+      STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_ASSIGN)
+      STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_VALUE_SIMPLE)
+      STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_LABEL)
     }
   case bitc::VALUE_SYMTAB_BLOCK_ID:
     switch (CodeID) {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 9c63116..d284c98 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -100,9 +100,6 @@ static cl::opt<bool> ExpandConstantExprs(
     cl::desc(
         "Expand constant expressions to instructions for testing purposes"));
 
-// Declare external flag for whether we're using the new debug-info format.
-extern llvm::cl::opt<bool> UseNewDbgInfoFormat;
-
 namespace {
 
 enum {
@@ -4279,6 +4276,10 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
 Error BitcodeReader::parseModule(uint64_t ResumeBit,
                                  bool ShouldLazyLoadMetadata,
                                  ParserCallbacks Callbacks) {
+  // Force the debug-info mode into the old format for now.
+  // FIXME: Remove this once all tools support RemoveDIs.
+  TheModule->IsNewDbgInfoFormat = false;
+
   this->ValueTypeCallback = std::move(Callbacks.ValueType);
   if (ResumeBit) {
     if (Error JumpFailed = Stream.JumpToBit(ResumeBit))
@@ -6398,6 +6399,89 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       InstructionList.push_back(I);
       break;
     }
+    case bitc::FUNC_CODE_DEBUG_RECORD_LABEL: {
+      // DPLabels are placed after the Instructions that they are attached to.
+      Instruction *Inst = getLastInstruction();
+      if (!Inst)
+        return error("Invalid dbg record: missing instruction");
+      DILocation *DIL = cast<DILocation>(getFnMetadataByID(Record[0]));
+      DILabel *Label = cast<DILabel>(getFnMetadataByID(Record[1]));
+      Inst->getParent()->insertDbgRecordBefore(
+          new DPLabel(Label, DebugLoc(DIL)), Inst->getIterator());
+      continue; // This isn't an instruction.
+    }
+    case bitc::FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE:
+    case bitc::FUNC_CODE_DEBUG_RECORD_VALUE:
+    case bitc::FUNC_CODE_DEBUG_RECORD_DECLARE:
+    case bitc::FUNC_CODE_DEBUG_RECORD_ASSIGN: {
+      // DPValues are placed after the Instructions that they are attached to.
+      Instruction *Inst = getLastInstruction();
+      if (!Inst)
+        return error("Invalid dbg record: missing instruction");
+
+      // First 3 fields are common to all kinds:
+      //   DILocation, DILocalVariable, DIExpression
+      // dbg_value (FUNC_CODE_DEBUG_RECORD_VALUE)
+      //   ..., LocationMetadata
+      // dbg_value (FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE - abbrev'd)
+      //   ..., Value
+      // dbg_declare (FUNC_CODE_DEBUG_RECORD_DECLARE)
+      //   ..., LocationMetadata
+      // dbg_assign (FUNC_CODE_DEBUG_RECORD_ASSIGN)
+      //   ..., LocationMetadata, DIAssignID, DIExpression, LocationMetadata
+      unsigned Slot = 0;
+      // Common fields (0-2).
+      DILocation *DIL = cast<DILocation>(getFnMetadataByID(Record[Slot++]));
+      DILocalVariable *Var =
+          cast<DILocalVariable>(getFnMetadataByID(Record[Slot++]));
+      DIExpression *Expr =
+          cast<DIExpression>(getFnMetadataByID(Record[Slot++]));
+
+      // Union field (3: LocationMetadata | Value).
+      Metadata *RawLocation = nullptr;
+      if (BitCode == bitc::FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE) {
+        Value *V = nullptr;
+        unsigned TyID = 0;
+        // We never expect to see a fwd reference value here because
+        // use-before-defs are encoded with the standard non-abbrev record
+        // type (they'd require encoding the type too, and they're rare). As a
+        // result, getValueTypePair only ever increments Slot by one here (once
+        // for the value, never twice for value and type).
+        unsigned SlotBefore = Slot;
+        if (getValueTypePair(Record, Slot, NextValueNo, V, TyID, CurBB))
+          return error("Invalid dbg record: invalid value");
+        (void)SlotBefore;
+        assert((SlotBefore == Slot - 1) && "unexpected fwd ref");
+        RawLocation = ValueAsMetadata::get(V);
+      } else {
+        RawLocation = getFnMetadataByID(Record[Slot++]);
+      }
+
+      DPValue *DPV = nullptr;
+      switch (BitCode) {
+      case bitc::FUNC_CODE_DEBUG_RECORD_VALUE:
+      case bitc::FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE:
+        DPV = new DPValue(RawLocation, Var, Expr, DIL,
+                          DPValue::LocationType::Value);
+        break;
+      case bitc::FUNC_CODE_DEBUG_RECORD_DECLARE:
+        DPV = new DPValue(RawLocation, Var, Expr, DIL,
+                          DPValue::LocationType::Declare);
+        break;
+      case bitc::FUNC_CODE_DEBUG_RECORD_ASSIGN: {
+        DIAssignID *ID = cast<DIAssignID>(getFnMetadataByID(Record[Slot++]));
+        DIExpression *AddrExpr =
+            cast<DIExpression>(getFnMetadataByID(Record[Slot++]));
+        Metadata *Addr = getFnMetadataByID(Record[Slot++]);
+        DPV = new DPValue(RawLocation, Var, Expr, ID, Addr, AddrExpr, DIL);
+        break;
+      }
+      default:
+        llvm_unreachable("Unknown DPValue bitcode");
+      }
+      Inst->getParent()->insertDbgRecordBefore(DPV, Inst->getIterator());
+      continue; // This isn't an instruction.
+    }
     case bitc::FUNC_CODE_INST_CALL: {
       // CALL: [paramattrs, cc, fmf, fnty, fnid, arg0, arg1...]
       if (Record.size() < 3)
@@ -6677,10 +6761,21 @@ Error BitcodeReader::materialize(GlobalValue *GV) {
   // Move the bit stream to the saved position of the deferred function body.
   if (Error JumpFailed = Stream.JumpToBit(DFII->second))
     return JumpFailed;
+
+  // Set the debug info mode to "new", forcing a mismatch between
+  // module and function debug modes. This is okay because we'll convert
+  // everything back to the old mode after parsing.
+  // FIXME: Remove this once all tools support RemoveDIs.
+  F->IsNewDbgInfoFormat = true;
+
   if (Error Err = parseFunctionBody(F))
     return Err;
   F->setIsMaterializable(false);
 
+  // Convert new debug info records into intrinsics.
+  // FIXME: Remove this once all tools support RemoveDIs.
+  F->convertFromNewDbgValues();
+
   if (StripDebugInfo)
     stripDebugInfo(*F);
 
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 597f493..6f0879a 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -99,6 +99,9 @@ namespace llvm {
 extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold;
 }
 
+extern bool WriteNewDbgInfoFormatToBitcode;
+extern llvm::cl::opt<bool> UseNewDbgInfoFormat;
+
 namespace {
 
 /// These are manifest constants used by the bitcode writer. They do not need to
@@ -128,6 +131,7 @@ enum {
   FUNCTION_INST_RET_VAL_ABBREV,
   FUNCTION_INST_UNREACHABLE_ABBREV,
   FUNCTION_INST_GEP_ABBREV,
+  FUNCTION_DEBUG_RECORD_VALUE_ABBREV,
 };
 
 /// Abstract class to manage the bitcode writing, subclassed for each bitcode
@@ -3512,25 +3516,95 @@ void ModuleBitcodeWriter::writeFunction(
       NeedsMetadataAttachment |= I.hasMetadataOtherThanDebugLoc();
 
       // If the instruction has a debug location, emit it.
-      DILocation *DL = I.getDebugLoc();
-      if (!DL)
-        continue;
-
-      if (DL == LastDL) {
-        // Just repeat the same debug loc as last time.
-        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
-        continue;
+      if (DILocation *DL = I.getDebugLoc()) {
+        if (DL == LastDL) {
+          // Just repeat the same debug loc as last time.
+          Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
+        } else {
+          Vals.push_back(DL->getLine());
+          Vals.push_back(DL->getColumn());
+          Vals.push_back(VE.getMetadataOrNullID(DL->getScope()));
+          Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt()));
+          Vals.push_back(DL->isImplicitCode());
+          Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
+          Vals.clear();
+          LastDL = DL;
+        }
       }
 
-      Vals.push_back(DL->getLine());
-      Vals.push_back(DL->getColumn());
-      Vals.push_back(VE.getMetadataOrNullID(DL->getScope()));
-      Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt()));
-      Vals.push_back(DL->isImplicitCode());
-      Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
-      Vals.clear();
-
-      LastDL = DL;
+      // If the instruction has DbgRecords attached to it, emit them. Note that
+      // they come after the instruction so that it's easy to attach them again
+      // when reading the bitcode, even though conceptually the debug locations
+      // start "before" the instruction.
+      if (I.hasDbgRecords() && WriteNewDbgInfoFormatToBitcode) {
+        /// Try to push the value only (unwrapped), otherwise push the
+        /// metadata wrapped value. Returns true if the value was pushed
+        /// without the ValueAsMetadata wrapper.
+        auto PushValueOrMetadata = [&Vals, InstID,
+                                    this](Metadata *RawLocation) {
+          assert(RawLocation && "RawLocation unexpectedly null in DPValue");
+          if (ValueAsMetadata *VAM = dyn_cast<ValueAsMetadata>(RawLocation)) {
+            SmallVector<unsigned, 2> ValAndType;
+            // If the value is a fwd-ref the type is also pushed. We don't
+            // want the type, so fwd-refs are kept wrapped (pushValueAndType
+            // returns false if the value is pushed without type).
+            if (!pushValueAndType(VAM->getValue(), InstID, ValAndType)) {
+              Vals.push_back(ValAndType[0]);
+              return true;
+            }
+          }
+          // The metadata is a DIArgList, or ValueAsMetadata wrapping a
+          // fwd-ref. Push the metadata ID.
+          Vals.push_back(VE.getMetadataID(RawLocation));
+          return false;
+        };
+
+        // Write out non-instruction debug information attached to this
+        // instruction. Write it after the instruction so that it's easy to
+        // re-attach to the instruction reading the records in.
+        for (DbgRecord &DR : I.DbgMarker->getDbgRecordRange()) {
+          if (DPLabel *DPL = dyn_cast<DPLabel>(&DR)) {
+            Vals.push_back(VE.getMetadataID(&*DPL->getDebugLoc()));
+            Vals.push_back(VE.getMetadataID(DPL->getLabel()));
+            Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_LABEL, Vals);
+            Vals.clear();
+            continue;
+          }
+
+          // First 3 fields are common to all kinds:
+          //   DILocation, DILocalVariable, DIExpression
+          // dbg_value (FUNC_CODE_DEBUG_RECORD_VALUE)
+          //   ..., LocationMetadata
+          // dbg_value (FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE - abbrev'd)
+          //   ..., Value
+          // dbg_declare (FUNC_CODE_DEBUG_RECORD_DECLARE)
+          //   ..., LocationMetadata
+          // dbg_assign (FUNC_CODE_DEBUG_RECORD_ASSIGN)
+          //   ..., LocationMetadata, DIAssignID, DIExpression, LocationMetadata
+          DPValue &DPV = cast<DPValue>(DR);
+          Vals.push_back(VE.getMetadataID(&*DPV.getDebugLoc()));
+          Vals.push_back(VE.getMetadataID(DPV.getVariable()));
+          Vals.push_back(VE.getMetadataID(DPV.getExpression()));
+          if (DPV.isDbgValue()) {
+            if (PushValueOrMetadata(DPV.getRawLocation()))
+              Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE, Vals,
+                                FUNCTION_DEBUG_RECORD_VALUE_ABBREV);
+            else
+              Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_VALUE, Vals);
+          } else if (DPV.isDbgDeclare()) {
+            Vals.push_back(VE.getMetadataID(DPV.getRawLocation()));
+            Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_DECLARE, Vals);
+          } else {
+            assert(DPV.isDbgAssign() && "Unexpected DbgRecord kind");
+            Vals.push_back(VE.getMetadataID(DPV.getRawLocation()));
+            Vals.push_back(VE.getMetadataID(DPV.getAssignID()));
+            Vals.push_back(VE.getMetadataID(DPV.getAddressExpression()));
+            Vals.push_back(VE.getMetadataID(DPV.getRawAddress()));
+            Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_ASSIGN, Vals);
+          }
+          Vals.clear();
+        }
+      }
     }
 
     if (BlockAddress *BA = BlockAddress::lookup(&BB)) {
@@ -3771,7 +3845,17 @@ void ModuleBitcodeWriter::writeBlockInfo() {
         FUNCTION_INST_GEP_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
-
+  {
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // dbgloc
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // var
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // expr
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // val
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
+        FUNCTION_DEBUG_RECORD_VALUE_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering! 1");
+  }
   Stream.ExitBlock();
 }
 
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 0eb9c24..de2396f 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -18,11 +18,12 @@
 #include "llvm/Pass.h"
 using namespace llvm;
 
+extern bool WriteNewDbgInfoFormatToBitcode;
+
 PreservedAnalyses BitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
-  // RemoveDIs: there's no bitcode representation of the DbgRecord debug-info,
-  // convert to dbg.values before writing out.
-  bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
-  if (IsNewDbgInfoFormat)
+  bool ConvertToOldDbgFormatForWrite =
+      M.IsNewDbgInfoFormat && !WriteNewDbgInfoFormatToBitcode;
+  if (ConvertToOldDbgFormatForWrite)
     M.convertFromNewDbgValues();
 
   const ModuleSummaryIndex *Index =
@@ -30,7 +31,7 @@ PreservedAnalyses BitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
                        : nullptr;
   WriteBitcodeToFile(M, OS, ShouldPreserveUseListOrder, Index, EmitModuleHash);
 
-  if (IsNewDbgInfoFormat)
+  if (ConvertToOldDbgFormatForWrite)
     M.convertToNewDbgValues();
 
   return PreservedAnalyses::all();
@@ -56,16 +57,15 @@ namespace {
     StringRef getPassName() const override { return "Bitcode Writer"; }
 
     bool runOnModule(Module &M) override {
-      // RemoveDIs: there's no bitcode representation of the DbgRecord
-      // debug-info, convert to dbg.values before writing out.
-      bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
-      if (IsNewDbgInfoFormat)
+      bool ConvertToOldDbgFormatForWrite =
+          M.IsNewDbgInfoFormat && !WriteNewDbgInfoFormatToBitcode;
+      if (ConvertToOldDbgFormatForWrite)
         M.convertFromNewDbgValues();
 
       WriteBitcodeToFile(M, OS, ShouldPreserveUseListOrder, /*Index=*/nullptr,
                          /*EmitModuleHash=*/false);
 
-      if (IsNewDbgInfoFormat)
+      if (ConvertToOldDbgFormatForWrite)
         M.convertToNewDbgValues();
       return false;
     }
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index fccb2a6..1c439c9 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -134,20 +134,28 @@ static OrderMap orderModule(const Module &M) {
     // Metadata used by instructions is decoded before the actual instructions,
     // so visit any constants used by it beforehand.
     for (const BasicBlock &BB : F)
-      for (const Instruction &I : BB)
-        for (const Value *V : I.operands()) {
-          if (const auto *MAV = dyn_cast<MetadataAsValue>(V)) {
-            if (const auto *VAM =
-                    dyn_cast<ValueAsMetadata>(MAV->getMetadata())) {
+      for (const Instruction &I : BB) {
+        auto OrderConstantFromMetadata = [&](Metadata *MD) {
+          if (const auto *VAM = dyn_cast<ValueAsMetadata>(MD)) {
+            orderConstantValue(VAM->getValue());
+          } else if (const auto *AL = dyn_cast<DIArgList>(MD)) {
+            for (const auto *VAM : AL->getArgs())
               orderConstantValue(VAM->getValue());
-            } else if (const auto *AL =
-                           dyn_cast<DIArgList>(MAV->getMetadata())) {
-              for (const auto *VAM : AL->getArgs())
-                orderConstantValue(VAM->getValue());
-            }
           }
+        };
+
+        for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange())) {
+          OrderConstantFromMetadata(DPV.getRawLocation());
+          if (DPV.isDbgAssign())
+            OrderConstantFromMetadata(DPV.getRawAddress());
         }
 
+        for (const Value *V : I.operands()) {
+          if (const auto *MAV = dyn_cast<MetadataAsValue>(V))
+            OrderConstantFromMetadata(MAV->getMetadata());
+        }
+      }
+
     for (const Argument &A : F.args())
       orderValue(&A, OM);
     for (const BasicBlock &BB : F)
@@ -261,33 +269,39 @@ static UseListOrderStack predictUseListOrder(const Module &M) {
   // constants in the last Function they're used in.  Module-level constants
   // have already been visited above.
   for (const Function &F : llvm::reverse(M)) {
+    auto PredictValueOrderFromMetadata = [&](Metadata *MD) {
+      if (const auto *VAM = dyn_cast<ValueAsMetadata>(MD)) {
+        predictValueUseListOrder(VAM->getValue(), &F, OM, Stack);
+      } else if (const auto *AL = dyn_cast<DIArgList>(MD)) {
+        for (const auto *VAM : AL->getArgs())
+          predictValueUseListOrder(VAM->getValue(), &F, OM, Stack);
+      }
+    };
     if (F.isDeclaration())
       continue;
     for (const BasicBlock &BB : F)
       predictValueUseListOrder(&BB, &F, OM, Stack);
     for (const Argument &A : F.args())
       predictValueUseListOrder(&A, &F, OM, Stack);
-    for (const BasicBlock &BB : F)
+    for (const BasicBlock &BB : F) {
       for (const Instruction &I : BB) {
+        for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange())) {
+          PredictValueOrderFromMetadata(DPV.getRawLocation());
+          if (DPV.isDbgAssign())
+            PredictValueOrderFromMetadata(DPV.getRawAddress());
+        }
         for (const Value *Op : I.operands()) {
           if (isa<Constant>(*Op) || isa<InlineAsm>(*Op)) // Visit GlobalValues.
             predictValueUseListOrder(Op, &F, OM, Stack);
-          if (const auto *MAV = dyn_cast<MetadataAsValue>(Op)) {
-            if (const auto *VAM =
-                    dyn_cast<ValueAsMetadata>(MAV->getMetadata())) {
-              predictValueUseListOrder(VAM->getValue(), &F, OM, Stack);
-            } else if (const auto *AL =
-                           dyn_cast<DIArgList>(MAV->getMetadata())) {
-              for (const auto *VAM : AL->getArgs())
-                predictValueUseListOrder(VAM->getValue(), &F, OM, Stack);
-            }
-          }
+          if (const auto *MAV = dyn_cast<MetadataAsValue>(Op))
+            PredictValueOrderFromMetadata(MAV->getMetadata());
         }
         if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
           predictValueUseListOrder(SVI->getShuffleMaskForBitcode(), &F, OM,
                                    Stack);
         predictValueUseListOrder(&I, &F, OM, Stack);
       }
+    }
   }
 
   // Visit globals last, since the module-level use-list block will be seen
@@ -409,6 +423,41 @@ ValueEnumerator::ValueEnumerator(const Module &M,
 
     for (const BasicBlock &BB : F)
       for (const Instruction &I : BB) {
+        // Local metadata is enumerated during function-incorporation, but
+        // any ConstantAsMetadata arguments in a DIArgList should be examined
+        // now.
+        auto EnumerateNonLocalValuesFromMetadata = [&](Metadata *MD) {
+          assert(MD && "Metadata unexpectedly null");
+          if (const auto *AL = dyn_cast<DIArgList>(MD)) {
+            for (const auto *VAM : AL->getArgs()) {
+              if (isa<ConstantAsMetadata>(VAM))
+                EnumerateMetadata(&F, VAM);
+            }
+            return;
+          }
+
+          if (!isa<LocalAsMetadata>(MD))
+            EnumerateMetadata(&F, MD);
+        };
+
+        for (DbgRecord &DR : I.getDbgRecordRange()) {
+          if (DPLabel *DPL = dyn_cast<DPLabel>(&DR)) {
+            EnumerateMetadata(&F, DPL->getLabel());
+            EnumerateMetadata(&F, &*DPL->getDebugLoc());
+            continue;
+          }
+          // Enumerate non-local location metadata.
+          DPValue &DPV = cast<DPValue>(DR);
+          EnumerateNonLocalValuesFromMetadata(DPV.getRawLocation());
+          EnumerateMetadata(&F, DPV.getExpression());
+          EnumerateMetadata(&F, DPV.getVariable());
+          EnumerateMetadata(&F, &*DPV.getDebugLoc());
+          if (DPV.isDbgAssign()) {
+            EnumerateNonLocalValuesFromMetadata(DPV.getRawAddress());
+            EnumerateMetadata(&F, DPV.getAssignID());
+            EnumerateMetadata(&F, DPV.getAddressExpression());
+          }
+        }
         for (const Use &Op : I.operands()) {
           auto *MD = dyn_cast<MetadataAsValue>(&Op);
           if (!MD) {
@@ -416,19 +465,7 @@ ValueEnumerator::ValueEnumerator(const Module &M,
             continue;
           }
 
-          // Local metadata is enumerated during function-incorporation, but
-          // any ConstantAsMetadata arguments in a DIArgList should be examined
-          // now.
-          if (isa<LocalAsMetadata>(MD->getMetadata()))
-            continue;
-          if (auto *AL = dyn_cast<DIArgList>(MD->getMetadata())) {
-            for (auto *VAM : AL->getArgs())
-              if (isa<ConstantAsMetadata>(VAM))
-                EnumerateMetadata(&F, VAM);
-            continue;
-          }
-
-          EnumerateMetadata(&F, MD->getMetadata());
+          EnumerateNonLocalValuesFromMetadata(MD->getMetadata());
         }
         if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
           EnumerateType(SVI->getShuffleMaskForBitcode()->getType());
@@ -1064,27 +1101,41 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
 
   SmallVector<LocalAsMetadata *, 8> FnLocalMDVector;
   SmallVector<DIArgList *, 8> ArgListMDVector;
+
+  auto AddFnLocalMetadata = [&](Metadata *MD) {
+    if (!MD)
+      return;
+    if (auto *Local = dyn_cast<LocalAsMetadata>(MD)) {
+      // Enumerate metadata after the instructions they might refer to.
+      FnLocalMDVector.push_back(Local);
+    } else if (auto *ArgList = dyn_cast<DIArgList>(MD)) {
+      ArgListMDVector.push_back(ArgList);
+      for (ValueAsMetadata *VMD : ArgList->getArgs()) {
+        if (auto *Local = dyn_cast<LocalAsMetadata>(VMD)) {
+          // Enumerate metadata after the instructions they might refer
+          // to.
+          FnLocalMDVector.push_back(Local);
+        }
+      }
+    }
+  };
+
   // Add all of the instructions.
   for (const BasicBlock &BB : F) {
     for (const Instruction &I : BB) {
       for (const Use &OI : I.operands()) {
-        if (auto *MD = dyn_cast<MetadataAsValue>(&OI)) {
-          if (auto *Local = dyn_cast<LocalAsMetadata>(MD->getMetadata())) {
-            // Enumerate metadata after the instructions they might refer to.
-            FnLocalMDVector.push_back(Local);
-          } else if (auto *ArgList = dyn_cast<DIArgList>(MD->getMetadata())) {
-            ArgListMDVector.push_back(ArgList);
-            for (ValueAsMetadata *VMD : ArgList->getArgs()) {
-              if (auto *Local = dyn_cast<LocalAsMetadata>(VMD)) {
-                // Enumerate metadata after the instructions they might refer
-                // to.
-                FnLocalMDVector.push_back(Local);
-              }
-            }
-          }
+        if (auto *MD = dyn_cast<MetadataAsValue>(&OI))
+          AddFnLocalMetadata(MD->getMetadata());
+      }
+      /// RemoveDIs: Add non-instruction function-local metadata uses.
+      for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange())) {
+        assert(DPV.getRawLocation() && "DPValue location unexpectedly null");
+        AddFnLocalMetadata(DPV.getRawLocation());
+        if (DPV.isDbgAssign()) {
+          assert(DPV.getRawAddress() && "DPValue location unexpectedly null");
+          AddFnLocalMetadata(DPV.getRawAddress());
         }
       }
-
       if (!I.getType()->isVoidTy())
         EnumerateValue(&I);
     }
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 0efe7a0..a155387 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -538,8 +538,10 @@ bool AsmPrinter::doInitialization(Module &M) {
   if (!M.getModuleInlineAsm().empty()) {
     OutStreamer->AddComment("Start of file scope inline assembly");
     OutStreamer->addBlankLine();
-    emitInlineAsm(M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(),
-                  TM.Options.MCOptions);
+    emitInlineAsm(
+        M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(),
+        TM.Options.MCOptions, nullptr,
+        InlineAsm::AsmDialect(TM.getMCAsmInfo()->getAssemblerDialect()));
     OutStreamer->AddComment("End of file scope inline assembly");
     OutStreamer->addBlankLine();
   }
diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
index 746926e56..52774c7 100644
--- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
+++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -226,7 +226,7 @@ void FunctionVarLocs::init(FunctionVarLocsBuilder &Builder) {
     // Any VarLocInfos attached to a DbgRecord should now be remapped to their
     // marker Instruction, in order of DbgRecord appearance and prior to any
     // VarLocInfos attached directly to that instruction.
-    for (const DPValue &DPV : DPValue::filter(I->getDbgRecordRange())) {
+    for (const DPValue &DPV : filterDbgVars(I->getDbgRecordRange())) {
       // Even though DPV defines a variable location, VarLocsBeforeInst can
       // still be empty if that VarLoc was redundant.
       if (!Builder.VarLocsBeforeInst.count(&DPV))
@@ -830,7 +830,7 @@ class MemLocFragmentFill {
   void process(BasicBlock &BB, VarFragMap &LiveSet) {
     BBInsertBeforeMap[&BB].clear();
     for (auto &I : BB) {
-      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
+      for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange())) {
         if (const auto *Locs = FnVarLocs->getWedge(&DPV)) {
           for (const VarLocInfo &Loc : *Locs) {
             addDef(Loc, &DPV, *I.getParent(), LiveSet);
@@ -1919,7 +1919,7 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
       // Skip over non-variable debug records (i.e., labels). They're going to
       // be read from IR (possibly re-ordering them within the debug record
       // range) rather than from the analysis results.
-      for (DPValue &DPV : DPValue::filter(II->getDbgRecordRange())) {
+      for (DPValue &DPV : filterDbgVars(II->getDbgRecordRange())) {
         resetInsertionPoint(DPV);
         processDPValue(DPV, LiveSet);
         assert(LiveSet->isValid());
@@ -2176,7 +2176,7 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
   };
   for (auto &BB : Fn) {
     for (auto &I : BB) {
-      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
+      for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange()))
         ProcessDbgRecord(&DPV, DPDeclares);
       if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) {
         ProcessDbgRecord(DII, InstDeclares);
@@ -2466,7 +2466,7 @@ bool AssignmentTrackingLowering::emitPromotedVarLocs(
   for (auto &BB : Fn) {
     for (auto &I : BB) {
       // Skip instructions other than dbg.values and dbg.assigns.
-      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
+      for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange()))
         if (DPV.isDbgValue() || DPV.isDbgAssign())
           TranslateDbgRecord(&DPV);
       auto *DVI = dyn_cast<DbgValueInst>(&I);
@@ -2568,7 +2568,7 @@ removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB,
       }
     };
     HandleLocsForWedge(&I);
-    for (DPValue &DPV : reverse(DPValue::filter(I.getDbgRecordRange())))
+    for (DPValue &DPV : reverse(filterDbgVars(I.getDbgRecordRange())))
       HandleLocsForWedge(&DPV);
   }
 
@@ -2633,7 +2633,7 @@ removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB,
       }
     };
 
-    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
+    for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange()))
       HandleLocsForWedge(&DPV);
     HandleLocsForWedge(&I);
   }
@@ -2719,7 +2719,7 @@ removeUndefDbgLocsFromEntryBlock(const BasicBlock *BB,
         Changed = true;
       }
     };
-    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
+    for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange()))
       HandleLocsForWedge(&DPV);
     HandleLocsForWedge(&I);
   }
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 055e275..bece70a 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -8506,7 +8506,7 @@ bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
 
 bool CodeGenPrepare::fixupDPValuesOnInst(Instruction &I) {
   bool AnyChange = false;
-  for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
+  for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange()))
     AnyChange |= fixupDPValue(DPV);
   return AnyChange;
 }
@@ -8620,7 +8620,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
       // If this isn't a dbg.value, process any attached DPValue records
       // attached to this instruction.
       for (DPValue &DPV : llvm::make_early_inc_range(
-               DPValue::filter(Insn.getDbgRecordRange()))) {
+               filterDbgVars(Insn.getDbgRecordRange()))) {
         if (DPV.Type != DPValue::LocationType::Value)
           continue;
         DbgProcessor(&DPV, &Insn);
diff --git a/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
index 78ad2a2..308f13c 100644
--- a/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
+++ b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
@@ -375,7 +375,7 @@ static void expandIToFP(Instruction *IToFP) {
   Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
                                   FloatWidth == 128 ? Call : Cast);
   Value *Cmp3 = Builder.CreateICmpSGT(
-      Sub2, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
+      Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
   Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
 
   // if.then4:
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 2e706b4..bee49db 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4936,24 +4936,6 @@ bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
   return true;
 }
 
-bool CombinerHelper::matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
-  // (G_*ADDO x, 0) -> x + no carry out
-  assert(MI.getOpcode() == TargetOpcode::G_UADDO ||
-         MI.getOpcode() == TargetOpcode::G_SADDO);
-  if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
-    return false;
-  Register Carry = MI.getOperand(1).getReg();
-  if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
-    return false;
-  Register Dst = MI.getOperand(0).getReg();
-  Register LHS = MI.getOperand(2).getReg();
-  MatchInfo = [=](MachineIRBuilder &B) {
-    B.buildCopy(Dst, LHS);
-    B.buildConstant(Carry, 0);
-  };
-  return true;
-}
-
 bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) {
   // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
   // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
@@ -6354,6 +6336,26 @@ CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
   return Value;
 }
 
+// FIXME G_SPLAT_VECTOR
+bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
+  auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
+  if (IConstant)
+    return true;
+
+  GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
+  if (!BuildVector)
+    return false;
+
+  unsigned NumSources = BuildVector->getNumSources();
+  for (unsigned I = 0; I < NumSources; ++I) {
+    std::optional<ValueAndVReg> IConstant =
+        getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI);
+    if (!IConstant)
+      return false;
+  }
+  return true;
+}
+
 // TODO: use knownbits to determine zeros
 bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
                                               BuildFnTy &MatchInfo) {
@@ -6928,3 +6930,178 @@ bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) {
 
   return false;
 }
+
+bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
+  GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
+
+  // Addo has no flags
+  Register Dst = Add->getReg(0);
+  Register Carry = Add->getReg(1);
+  Register LHS = Add->getLHSReg();
+  Register RHS = Add->getRHSReg();
+  bool IsSigned = Add->isSigned();
+  LLT DstTy = MRI.getType(Dst);
+  LLT CarryTy = MRI.getType(Carry);
+
+  // We want do fold the [u|s]addo.
+  if (!MRI.hasOneNonDBGUse(Dst))
+    return false;
+
+  // Fold addo, if the carry is dead -> add, undef.
+  if (MRI.use_nodbg_empty(Carry) &&
+      isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildAdd(Dst, LHS, RHS);
+      B.buildUndef(Carry);
+    };
+    return true;
+  }
+
+  // We want do fold the [u|s]addo.
+  if (!MRI.hasOneNonDBGUse(Carry))
+    return false;
+
+  // Canonicalize constant to RHS.
+  if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
+    if (IsSigned) {
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildSAddo(Dst, Carry, RHS, LHS);
+      };
+      return true;
+    }
+    // !IsSigned
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildUAddo(Dst, Carry, RHS, LHS);
+    };
+    return true;
+  }
+
+  std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
+  std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
+
+  // Fold addo(c1, c2) -> c3, carry.
+  if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
+      isConstantLegalOrBeforeLegalizer(CarryTy)) {
+    bool Overflow;
+    APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
+                            : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildConstant(Dst, Result);
+      B.buildConstant(Carry, Overflow);
+    };
+    return true;
+  }
+
+  // Fold (addo x, 0) -> x, no borrow
+  if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildCopy(Dst, LHS);
+      B.buildConstant(Carry, 0);
+    };
+    return true;
+  }
+
+  // Given 2 constant operands whose sum does not overflow:
+  // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
+  // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
+  GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
+  if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
+      ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
+       (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
+    std::optional<APInt> MaybeAddRHS =
+        getConstantOrConstantSplatVector(AddLHS->getRHSReg());
+    if (MaybeAddRHS) {
+      bool Overflow;
+      APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
+                            : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
+      if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
+        if (IsSigned) {
+          MatchInfo = [=](MachineIRBuilder &B) {
+            auto ConstRHS = B.buildConstant(DstTy, NewC);
+            B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
+          };
+          return true;
+        }
+        // !IsSigned
+        MatchInfo = [=](MachineIRBuilder &B) {
+          auto ConstRHS = B.buildConstant(DstTy, NewC);
+          B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
+        };
+        return true;
+      }
+    }
+  };
+
+  // We try to combine addo to non-overflowing add.
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
+      !isConstantLegalOrBeforeLegalizer(CarryTy))
+    return false;
+
+  // We try to combine uaddo to non-overflowing add.
+  if (!IsSigned) {
+    ConstantRange CRLHS =
+        ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false);
+    ConstantRange CRRHS =
+        ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false);
+
+    switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
+    case ConstantRange::OverflowResult::MayOverflow:
+      return false;
+    case ConstantRange::OverflowResult::NeverOverflows: {
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
+        B.buildConstant(Carry, 0);
+      };
+      return true;
+    }
+    case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+    case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildAdd(Dst, LHS, RHS);
+        B.buildConstant(Carry, 1);
+      };
+      return true;
+    }
+    }
+    return false;
+  }
+
+  // We try to combine saddo to non-overflowing add.
+
+  // If LHS and RHS each have at least two sign bits, then there is no signed
+  // overflow.
+  if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) {
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
+      B.buildConstant(Carry, 0);
+    };
+    return true;
+  }
+
+  ConstantRange CRLHS =
+      ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true);
+  ConstantRange CRRHS =
+      ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true);
+
+  switch (CRLHS.signedAddMayOverflow(CRRHS)) {
+  case ConstantRange::OverflowResult::MayOverflow:
+    return false;
+  case ConstantRange::OverflowResult::NeverOverflows: {
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
+      B.buildConstant(Carry, 0);
+    };
+    return true;
+  }
+  case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+  case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildAdd(Dst, LHS, RHS);
+      B.buildConstant(Carry, 1);
+    };
+    return true;
+  }
+  }
+
+  return false;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 94fdb37e..5cdc792 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
@@ -1770,6 +1771,41 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
   return true;
 }
 
+bool IRTranslator::translateVectorInterleave2Intrinsic(
+    const CallInst &CI, MachineIRBuilder &MIRBuilder) {
+  assert(CI.getIntrinsicID() == Intrinsic::experimental_vector_interleave2 &&
+         "This function can only be called on the interleave2 intrinsic!");
+  // Canonicalize interleave2 to G_SHUFFLE_VECTOR (similar to SelectionDAG).
+  Register Op0 = getOrCreateVReg(*CI.getOperand(0));
+  Register Op1 = getOrCreateVReg(*CI.getOperand(1));
+  Register Res = getOrCreateVReg(CI);
+
+  LLT OpTy = MRI->getType(Op0);
+  MIRBuilder.buildShuffleVector(Res, Op0, Op1,
+                                createInterleaveMask(OpTy.getNumElements(), 2));
+
+  return true;
+}
+
+bool IRTranslator::translateVectorDeinterleave2Intrinsic(
+    const CallInst &CI, MachineIRBuilder &MIRBuilder) {
+  assert(CI.getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2 &&
+         "This function can only be called on the deinterleave2 intrinsic!");
+  // Canonicalize deinterleave2 to shuffles that extract sub-vectors (similar to
+  // SelectionDAG).
+  Register Op = getOrCreateVReg(*CI.getOperand(0));
+  auto Undef = MIRBuilder.buildUndef(MRI->getType(Op));
+  ArrayRef<Register> Res = getOrCreateVRegs(CI);
+
+  LLT ResTy = MRI->getType(Res[0]);
+  MIRBuilder.buildShuffleVector(Res[0], Op, Undef,
+                                createStrideMask(0, 2, ResTy.getNumElements()));
+  MIRBuilder.buildShuffleVector(Res[1], Op, Undef,
+                                createStrideMask(1, 2, ResTy.getNumElements()));
+
+  return true;
+}
+
 void IRTranslator::getStackGuard(Register DstReg,
                                  MachineIRBuilder &MIRBuilder) {
   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -2474,6 +2510,21 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
 
     return true;
   }
+
+  case Intrinsic::experimental_vector_interleave2:
+  case Intrinsic::experimental_vector_deinterleave2: {
+    // Both intrinsics have at least one operand.
+    Value *Op0 = CI.getOperand(0);
+    LLT ResTy = getLLTForType(*Op0->getType(), MIRBuilder.getDataLayout());
+    if (!ResTy.isFixedVector())
+      return false;
+
+    if (CI.getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
+      return translateVectorInterleave2Intrinsic(CI, MIRBuilder);
+
+    return translateVectorDeinterleave2Intrinsic(CI, MIRBuilder);
+  }
+
 #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC)  \
   case Intrinsic::INTRINSIC:
 #include "llvm/IR/ConstrainedOps.def"
diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp
index c536ec9..cc5aad1 100644
--- a/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -580,7 +580,7 @@ PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
   BasicBlock *Preheader = L->getLoopPreheader();
   BasicBlock *Header = L->getHeader();
   BasicBlock *Latch = ExitBranch->getParent();
-  IRBuilder<> Builder(Header->getFirstNonPHI());
+  IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());
   PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
   Index->addIncoming(NumElts, Preheader);
   Index->addIncoming(EltsRem, Latch);
diff --git a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
index bc2ea3f..5caf20a 100644
--- a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
+++ b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
@@ -39,6 +39,9 @@ LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
     return LLT::scalar(SizeInBits);
   }
 
+  if (Ty.isTokenTy())
+    return LLT::token();
+
   return LLT();
 }
 
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index c659379..a4c87a7 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -155,9 +155,6 @@ MachineCombiner::getOperandDef(const MachineOperand &MO) {
   // We need a virtual register definition.
   if (MO.isReg() && MO.getReg().isVirtual())
     DefInstr = MRI->getUniqueVRegDef(MO.getReg());
-  // PHI's have no depth etc.
-  if (DefInstr && DefInstr->isPHI())
-    DefInstr = nullptr;
   return DefInstr;
 }
 
diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 1492c8c..1019c53 100644
--- a/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -198,6 +198,23 @@ MDNode *MachineLoop::getLoopID() const {
   return LoopID;
 }
 
+bool MachineLoop::isLoopInvariantImplicitPhysReg(Register Reg) const {
+  MachineFunction *MF = getHeader()->getParent();
+  MachineRegisterInfo *MRI = &MF->getRegInfo();
+
+  if (MRI->isConstantPhysReg(Reg))
+    return true;
+
+  if (!MF->getSubtarget()
+           .getRegisterInfo()
+           ->shouldAnalyzePhysregInMachineLoopInfo(Reg))
+    return false;
+
+  return !llvm::any_of(
+      MRI->def_instructions(Reg),
+      [this](const MachineInstr &MI) { return this->contains(&MI); });
+}
+
 bool MachineLoop::isLoopInvariant(MachineInstr &I,
                                   const Register ExcludeReg) const {
   MachineFunction *MF = I.getParent()->getParent();
@@ -226,7 +243,7 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I,
         // it could get allocated to something with a def during allocation.
         // However, if the physreg is known to always be caller saved/restored
         // then this use is safe to hoist.
-        if (!MRI->isConstantPhysReg(Reg) &&
+        if (!isLoopInvariantImplicitPhysReg(Reg) &&
             !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) &&
             !TII->isIgnorableUse(MO))
           return false;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 40b078a..b6a5925 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -597,8 +597,8 @@ namespace {
     SDValue foldSextSetcc(SDNode *N);
     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
                               const SDLoc &DL);
-    SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
-    SDValue foldABSToABD(SDNode *N);
+    SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
+    SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
     SDValue unfoldMaskedMerge(SDNode *N);
     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
@@ -3596,7 +3596,7 @@ static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
 
 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
 // usubsat(a,b), optionally as a truncated type.
-SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
+SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
   if (N->getOpcode() != ISD::SUB ||
       !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
     return SDValue();
@@ -3611,18 +3611,18 @@ SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
     SDValue MaxLHS = Op0.getOperand(0);
     SDValue MaxRHS = Op0.getOperand(1);
     if (MaxLHS == Op1)
-      return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
+      return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
     if (MaxRHS == Op1)
-      return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
+      return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
   }
 
   if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
     SDValue MinLHS = Op1.getOperand(0);
     SDValue MinRHS = Op1.getOperand(1);
     if (MinLHS == Op0)
-      return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
+      return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
     if (MinRHS == Op0)
-      return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
+      return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
   }
 
   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
@@ -3633,10 +3633,10 @@ SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
     SDValue MinRHS = Op1.getOperand(0).getOperand(1);
     if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
-                                 DAG, SDLoc(N));
+                                 DAG, DL);
     if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
-                                 DAG, SDLoc(N));
+                                 DAG, DL);
   }
 
   return SDValue();
@@ -3831,7 +3831,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
     return V;
 
-  if (SDValue V = foldSubToUSubSat(VT, N))
+  if (SDValue V = foldSubToUSubSat(VT, N, DL))
     return V;
 
   // (A - B) - 1  ->  add (xor B, -1), A
@@ -6655,35 +6655,25 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
 
 /// For targets that support usubsat, match a bit-hack form of that operation
 /// that ends in 'and' and convert it.
-static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-  EVT VT = N1.getValueType();
-
-  // Canonicalize SRA as operand 1.
-  if (N0.getOpcode() == ISD::SRA)
-    std::swap(N0, N1);
-
-  // xor/add with SMIN (signmask) are logically equivalent.
-  if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
-    return SDValue();
-
-  if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
-      N0.getOperand(0) != N1.getOperand(0))
-    return SDValue();
-
+static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL) {
+  EVT VT = N->getValueType(0);
   unsigned BitWidth = VT.getScalarSizeInBits();
-  ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
-  ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
-  if (!XorC || !XorC->getAPIntValue().isSignMask() ||
-      !SraC || SraC->getAPIntValue() != BitWidth - 1)
-    return SDValue();
+  APInt SignMask = APInt::getSignMask(BitWidth);
 
   // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
   // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
-  SDLoc DL(N);
-  SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
-  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
+  // xor/add with SMIN (signmask) are logically equivalent.
+  SDValue X;
+  if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
+                         m_OneUse(m_Sra(m_Deferred(X),
+                                        m_SpecificInt(BitWidth - 1))))) &&
+      !sd_match(N, m_And(m_OneUse(m_Add(m_Value(X), m_SpecificInt(SignMask))),
+                         m_OneUse(m_Sra(m_Deferred(X),
+                                        m_SpecificInt(BitWidth - 1))))))
+    return SDValue();
+
+  return DAG.getNode(ISD::USUBSAT, DL, VT, X,
+                     DAG.getConstant(SignMask, DL, VT));
 }
 
 /// Given a bitwise logic operation N with a matching bitwise logic operand,
@@ -6773,34 +6763,34 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N1.getValueType();
+  SDLoc DL(N);
 
   // x & x --> x
   if (N0 == N1)
     return N0;
 
   // fold (and c1, c2) -> c1&c2
-  if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
+  if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
     return C;
 
   // canonicalize constant to RHS
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
-    return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
+    return DAG.getNode(ISD::AND, DL, VT, N1, N0);
 
   if (areBitwiseNotOfEachother(N0, N1))
-    return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), SDLoc(N),
-                           VT);
+    return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
 
   // fold vector ops
   if (VT.isVector()) {
-    if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
+    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
       return FoldedVOp;
 
     // fold (and x, 0) -> 0, vector edition
     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
       // do not return N1, because undef node may exist in N1
-      return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
-                             SDLoc(N), N1.getValueType());
+      return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()), DL,
+                             N1.getValueType());
 
     // fold (and x, -1) -> x, vector edition
     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
@@ -6820,8 +6810,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         uint64_t ElementSize =
             LoadVT.getVectorElementType().getScalarSizeInBits();
         if (Splat->getAPIntValue().isMask(ElementSize)) {
-          auto NewLoad = DAG.getMaskedLoad(
-              ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
+          SDValue NewLoad = DAG.getMaskedLoad(
+              ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
               MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
               LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
               ISD::ZEXTLOAD, MLoad->isExpandingLoad());
@@ -6843,7 +6833,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   unsigned BitWidth = VT.getScalarSizeInBits();
   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
-    return DAG.getConstant(0, SDLoc(N), VT);
+    return DAG.getConstant(0, DL, VT);
 
   if (SDValue R = foldAndOrOfSETCC(N, DAG))
     return R;
@@ -6852,12 +6842,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     return NewSel;
 
   // reassociate and
-  if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
+  if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
     return RAND;
 
   // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
-  if (SDValue SD = reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, SDLoc(N),
-                                        VT, N0, N1))
+  if (SDValue SD =
+          reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
     return SD;
 
   // fold (and (or x, C), D) -> D if (C & D) == D
@@ -6877,18 +6867,16 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
 
     // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
     if (DAG.MaskedValueIsZero(N0Op0, Mask))
-      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0Op0);
+      return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
 
     // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
     if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
         TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
         TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
-        TLI.isNarrowingProfitable(VT, SrcVT)) {
-      SDLoc DL(N);
+        TLI.isNarrowingProfitable(VT, SrcVT))
       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
                          DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
                                      DAG.getZExtOrTrunc(N1, DL, SrcVT)));
-    }
   }
 
   // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
@@ -6900,7 +6888,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
         DAG.isConstantIntBuildVectorOrConstantInt(N0Op0.getOperand(1)) &&
         N0->hasOneUse() && N0Op0->hasOneUse()) {
-      SDLoc DL(N);
       SDValue NewMask =
           DAG.getNode(ISD::AND, DL, VT, N1,
                       DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
@@ -6921,8 +6908,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
        N0.getOperand(0).getOpcode() == ISD::LOAD &&
        N0.getOperand(0).getResNo() == 0) ||
       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
-    LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
-                                         N0 : N0.getOperand(0) );
+    auto *Load =
+        cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
 
     // Get the constant (if applicable) the zero'th operand is being ANDed with.
     // This can be a pure constant or a vector splat, in which case we treat the
@@ -7032,9 +7019,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       //    (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
       // => (extract_subvector (iN_zeroext v))
       SDValue ZeroExtExtendee =
-          DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), ExtVT, Extendee);
+          DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
 
-      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee,
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
                          N0.getOperand(1));
     }
   }
@@ -7051,8 +7038,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
 
       SDValue ZExtLoad = DAG.getMaskedGather(
-          DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
-          GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
+          DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
+          GN0->getIndexType(), ISD::ZEXTLOAD);
 
       CombineTo(N, ZExtLoad);
       AddToWorklist(ZExtLoad.getNode());
@@ -7104,7 +7091,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         return SubRHS;
       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
-        return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
+        return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SubRHS.getOperand(0));
     }
   }
 
@@ -7118,7 +7105,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
       (ISD::isEXTLoad(N0.getNode()) ||
        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    auto *LN0 = cast<LoadSDNode>(N0);
     EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
@@ -7173,10 +7160,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
 
   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
   if (IsAndZeroExtMask(N0, N1))
-    return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
 
   if (hasOperation(ISD::USUBSAT, VT))
-    if (SDValue V = foldAndToUsubsat(N, DAG))
+    if (SDValue V = foldAndToUsubsat(N, DAG, DL))
       return V;
 
   // Postpone until legalization completed to avoid interference with bswap
@@ -10733,7 +10720,7 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
 // (ABS (SUB (EXTEND a), (EXTEND b))).
 // (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
 // Generates UABD/SABD instruction.
-SDValue DAGCombiner::foldABSToABD(SDNode *N) {
+SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
   EVT SrcVT = N->getValueType(0);
 
   if (N->getOpcode() == ISD::TRUNCATE)
@@ -10745,7 +10732,6 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDValue AbsOp1 = N->getOperand(0);
   SDValue Op0, Op1;
-  SDLoc DL(N);
 
   if (AbsOp1.getOpcode() != ISD::SUB)
     return SDValue();
@@ -10804,9 +10790,10 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
 SDValue DAGCombiner::visitABS(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
+  SDLoc DL(N);
 
   // fold (abs c1) -> c2
-  if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, SDLoc(N), VT, {N0}))
+  if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
     return C;
   // fold (abs (abs x)) -> (abs x)
   if (N0.getOpcode() == ISD::ABS)
@@ -10815,7 +10802,7 @@ SDValue DAGCombiner::visitABS(SDNode *N) {
   if (DAG.SignBitIsZero(N0))
     return N0;
 
-  if (SDValue ABD = foldABSToABD(N))
+  if (SDValue ABD = foldABSToABD(N, DL))
     return ABD;
 
   // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
@@ -10825,7 +10812,6 @@ SDValue DAGCombiner::visitABS(SDNode *N) {
     if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
         TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
         hasOperation(ISD::ABS, ExtVT)) {
-      SDLoc DL(N);
       return DAG.getNode(
           ISD::ZERO_EXTEND, DL, VT,
           DAG.getNode(ISD::ABS, DL, ExtVT,
@@ -14621,6 +14607,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   EVT VT = N->getValueType(0);
   EVT SrcVT = N0.getValueType();
   bool isLE = DAG.getDataLayout().isLittleEndian();
+  SDLoc DL(N);
 
   // trunc(undef) = undef
   if (N0.isUndef())
@@ -14628,10 +14615,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
 
   // fold (truncate (truncate x)) -> (truncate x)
   if (N0.getOpcode() == ISD::TRUNCATE)
-    return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
+    return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
 
   // fold (truncate c1) -> c1
-  if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, SDLoc(N), VT, {N0}))
+  if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
     return C;
 
   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
@@ -14640,10 +14627,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       N0.getOpcode() == ISD::ANY_EXTEND) {
     // if the source is smaller than the dest, we still need an extend.
     if (N0.getOperand(0).getValueType().bitsLT(VT))
-      return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+      return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
     // if the source is larger than the dest, than we just need the truncate.
     if (N0.getOperand(0).getValueType().bitsGT(VT))
-      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
+      return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
     // if the source and dest are the same type, we can drop both the extend
     // and the truncate.
     return N0.getOperand(0);
@@ -14657,8 +14644,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     SDValue ExtVal = N0.getOperand(1);
     EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
     if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
-      SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
-      return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal);
+      SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
     }
   }
 
@@ -14693,8 +14680,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
       int Elt = EltNo->getAsZExtVal();
       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
-
-      SDLoc DL(N);
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
                          DAG.getBitcast(NVT, N0.getOperand(0)),
                          DAG.getVectorIdxConstant(Index, DL));
@@ -14709,7 +14694,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       SDValue Cond = N0.getOperand(0);
       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
-      return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
+      return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
     }
   }
 
@@ -14721,22 +14706,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     KnownBits Known = DAG.computeKnownBits(Amt);
     unsigned Size = VT.getScalarSizeInBits();
     if (Known.countMaxActiveBits() <= Log2_32(Size)) {
-      SDLoc SL(N);
       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
-
-      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
       if (AmtVT != Amt.getValueType()) {
-        Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
+        Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
         AddToWorklist(Amt.getNode());
       }
-      return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
+      return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
     }
   }
 
-  if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
+  if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
     return V;
 
-  if (SDValue ABD = foldABSToABD(N))
+  if (SDValue ABD = foldABSToABD(N, DL))
     return ABD;
 
   // Attempt to pre-truncate BUILD_VECTOR sources.
@@ -14745,7 +14728,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
       // Avoid creating illegal types if running after type legalizer.
       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
-    SDLoc DL(N);
     EVT SVT = VT.getScalarType();
     SmallVector<SDValue, 8> TruncOps;
     for (const SDValue &Op : N0->op_values()) {
@@ -14759,7 +14741,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
       (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
-    SDLoc DL(N);
     EVT SVT = VT.getScalarType();
     return DAG.getSplatVector(
         VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
@@ -14791,7 +14772,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
         Opnds.push_back(BuildVect.getOperand(i));
 
-      return DAG.getBuildVector(VT, SDLoc(N), Opnds);
+      return DAG.getBuildVector(VT, DL, Opnds);
     }
   }
 
@@ -14854,7 +14835,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
         AddToWorklist(NV.getNode());
         Opnds.push_back(NV);
       }
-      return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
+      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
     }
   }
 
@@ -14868,11 +14849,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
         (!LegalOperations ||
          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
-      SDLoc SL(N);
-
       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
-      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
-                         DAG.getVectorIdxConstant(Idx, SL));
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
+                         DAG.getVectorIdxConstant(Idx, DL));
     }
   }
 
@@ -14917,7 +14896,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       // we are extra cautious to not create an unsupported operation.
       // Target-specific changes are likely needed to avoid regressions here.
       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
-        SDLoc DL(N);
         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
@@ -14934,7 +14912,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
          TLI.isOperationLegal(N0.getOpcode(), VT)) &&
         N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
-      SDLoc DL(N);
       SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
       SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
       SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
@@ -14951,7 +14928,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
             VT.getScalarSizeInBits() &&
         hasOperation(N0.getOpcode(), VT)) {
       return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
-                                 DAG, SDLoc(N));
+                                 DAG, DL);
     }
     break;
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 909c669..52e12cf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -4527,6 +4527,7 @@ void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo,
 void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
                                           SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
+  unsigned Opc = N->getOpcode();
   SDLoc dl(N);
 
   // If we can emit an efficient shift operation, do so now.  Check to see if
@@ -4541,12 +4542,12 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
 
   // If this target supports shift_PARTS, use it.  First, map to the _PARTS opc.
   unsigned PartsOpc;
-  if (N->getOpcode() == ISD::SHL) {
+  if (Opc == ISD::SHL) {
     PartsOpc = ISD::SHL_PARTS;
-  } else if (N->getOpcode() == ISD::SRL) {
+  } else if (Opc == ISD::SRL) {
     PartsOpc = ISD::SRL_PARTS;
   } else {
-    assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+    assert(Opc == ISD::SRA && "Unknown shift!");
     PartsOpc = ISD::SRA_PARTS;
   }
 
@@ -4599,7 +4600,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
   // Otherwise, emit a libcall.
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   bool isSigned;
-  if (N->getOpcode() == ISD::SHL) {
+  if (Opc == ISD::SHL) {
     isSigned = false; /*sign irrelevant*/
     if (VT == MVT::i16)
       LC = RTLIB::SHL_I16;
@@ -4609,7 +4610,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
       LC = RTLIB::SHL_I64;
     else if (VT == MVT::i128)
       LC = RTLIB::SHL_I128;
-  } else if (N->getOpcode() == ISD::SRL) {
+  } else if (Opc == ISD::SRL) {
     isSigned = false;
     if (VT == MVT::i16)
       LC = RTLIB::SRL_I16;
@@ -4620,7 +4621,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
     else if (VT == MVT::i128)
       LC = RTLIB::SRL_I128;
   } else {
-    assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+    assert(Opc == ISD::SRA && "Unknown shift!");
     isSigned = true;
     if (VT == MVT::i16)
       LC = RTLIB::SRA_I16;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b8c7d08..19f9354 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6039,30 +6039,14 @@ static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
     APInt C2Ext = C2.zext(FullWidth);
     return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
   }
-  case ISD::AVGFLOORS: {
-    unsigned FullWidth = C1.getBitWidth() + 1;
-    APInt C1Ext = C1.sext(FullWidth);
-    APInt C2Ext = C2.sext(FullWidth);
-    return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1);
-  }
-  case ISD::AVGFLOORU: {
-    unsigned FullWidth = C1.getBitWidth() + 1;
-    APInt C1Ext = C1.zext(FullWidth);
-    APInt C2Ext = C2.zext(FullWidth);
-    return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1);
-  }
-  case ISD::AVGCEILS: {
-    unsigned FullWidth = C1.getBitWidth() + 1;
-    APInt C1Ext = C1.sext(FullWidth);
-    APInt C2Ext = C2.sext(FullWidth);
-    return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
-  }
-  case ISD::AVGCEILU: {
-    unsigned FullWidth = C1.getBitWidth() + 1;
-    APInt C1Ext = C1.zext(FullWidth);
-    APInt C2Ext = C2.zext(FullWidth);
-    return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
-  }
+  case ISD::AVGFLOORS:
+    return APIntOps::avgFloorS(C1, C2);
+  case ISD::AVGFLOORU:
+    return APIntOps::avgFloorU(C1, C2);
+  case ISD::AVGCEILS:
+    return APIntOps::avgCeilS(C1, C2);
+  case ISD::AVGCEILU:
+    return APIntOps::avgCeilU(C1, C2);
   case ISD::ABDS:
     return APIntOps::abds(C1, C2);
   case ISD::ABDU:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index c78c3ed..30f67d3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1461,7 +1461,7 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
     if (DI && processDbgDeclare(FuncInfo, DI->getAddress(), DI->getExpression(),
                                 DI->getVariable(), DI->getDebugLoc()))
       FuncInfo.PreprocessedDbgDeclares.insert(DI);
-    for (const DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
+    for (const DPValue &DPV : filterDbgVars(I.getDbgRecordRange())) {
       if (DPV.Type == DPValue::LocationType::Declare &&
           processDbgDeclare(FuncInfo, DPV.getVariableLocationOp(0),
                             DPV.getExpression(), DPV.getVariable(),
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b3dc9de..57f8fc4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -10694,7 +10694,7 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
 
       SDValue Lo, Hi;
       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
-      Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
+      Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
       VT = HalfVT;
     }
   }
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 6943ce2..15b5942 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2680,21 +2680,34 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor(
 
 MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
     const MCSymbol *Sym, const TargetMachine &TM) const {
-  // Use TE storage-mapping class when large code model is enabled so that
-  // the chance of needing -bbigtoc is decreased. Also, the toc-entry for
-  // EH info is never referenced directly using instructions so it can be
-  // allocated with TE storage-mapping class.
-  // The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC, otherwise
-  // the AIX assembler will complain.
+  const XCOFF::StorageMappingClass SMC = [](const MCSymbol *Sym,
+                                            const TargetMachine &TM) {
+    const MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(Sym);
+
+    // The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC,
+    // otherwise the AIX assembler will complain.
+    if (XSym->getSymbolTableName() == "_$TLSML")
+      return XCOFF::XMC_TC;
+
+    // Use large code model toc entries for ehinfo symbols as they are
+    // never referenced directly. The runtime loads their TOC entry
+    // addresses from the trace-back table.
+    if (XSym->isEHInfo())
+      return XCOFF::XMC_TE;
+
+    // If the symbol does not have a code model specified use the module value.
+    if (!XSym->hasPerSymbolCodeModel())
+      return TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE
+                                                   : XCOFF::XMC_TC;
+
+    return XSym->getPerSymbolCodeModel() == MCSymbolXCOFF::CM_Large
+               ? XCOFF::XMC_TE
+               : XCOFF::XMC_TC;
+  }(Sym, TM);
+
   return getContext().getXCOFFSection(
       cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(),
-      XCOFF::CsectProperties(
-          ((TM.getCodeModel() == CodeModel::Large &&
-            cast<MCSymbolXCOFF>(Sym)->getSymbolTableName() != "_$TLSML") ||
-           cast<MCSymbolXCOFF>(Sym)->isEHInfo())
-              ? XCOFF::XMC_TE
-              : XCOFF::XMC_TC,
-          XCOFF::XTY_SD));
+      XCOFF::CsectProperties(SMC, XCOFF::XTY_SD));
 }
 
 MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA(
diff --git a/llvm/lib/DebugInfo/LogicalView/LVReaderHandler.cpp b/llvm/lib/DebugInfo/LogicalView/LVReaderHandler.cpp
index 5f82f81..16c4fbe 100644
--- a/llvm/lib/DebugInfo/LogicalView/LVReaderHandler.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/LVReaderHandler.cpp
@@ -48,7 +48,7 @@ Error LVReaderHandler::createReader(StringRef Filename, LVReaders &Readers,
         return std::make_unique<LVCodeViewReader>(Filename, FileFormatName,
                                                   *COFF, W, ExePath);
       }
-      if (Obj.isELF() || Obj.isMachO())
+      if (Obj.isELF() || Obj.isMachO() || Obj.isWasm())
         return std::make_unique<LVELFReader>(Filename, FileFormatName, Obj, W);
     }
     if (isa<PDBFile *>(Input)) {
diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp
index a0cd8b7..2d46414 100644
--- a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp
@@ -146,6 +146,30 @@ bool LVBinaryReader::getSymbolTableIsComdat(StringRef Name) {
 
 void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) {
   for (const object::SectionRef &Section : Obj.sections()) {
+    LLVM_DEBUG({
+      Expected<StringRef> SectionNameOrErr = Section.getName();
+      StringRef Name;
+      if (!SectionNameOrErr)
+        consumeError(SectionNameOrErr.takeError());
+      else
+        Name = *SectionNameOrErr;
+      dbgs() << "Index: " << format_decimal(Section.getIndex(), 3) << ", "
+             << "Address: " << hexValue(Section.getAddress()) << ", "
+             << "Size: " << hexValue(Section.getSize()) << ", "
+             << "Name: " << Name << "\n";
+      dbgs() << "isCompressed:   " << Section.isCompressed() << ", "
+             << "isText:         " << Section.isText() << ", "
+             << "isData:         " << Section.isData() << ", "
+             << "isBSS:          " << Section.isBSS() << ", "
+             << "isVirtual:      " << Section.isVirtual() << "\n";
+      dbgs() << "isBitcode:      " << Section.isBitcode() << ", "
+             << "isStripped:     " << Section.isStripped() << ", "
+             << "isBerkeleyText: " << Section.isBerkeleyText() << ", "
+             << "isBerkeleyData: " << Section.isBerkeleyData() << ", "
+             << "isDebugSection: " << Section.isDebugSection() << "\n";
+      dbgs() << "\n";
+    });
+
     if (!Section.isText() || Section.isVirtual() || !Section.getSize())
       continue;
 
@@ -161,8 +185,14 @@ void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) {
       continue;
     }
     if ((*SectionNameOrErr).equals(".text") ||
-        (*SectionNameOrErr).equals(".code"))
+        (*SectionNameOrErr).equals("CODE") ||
+        (*SectionNameOrErr).equals(".code")) {
       DotTextSectionIndex = Section.getIndex();
+      // If the object is WebAssembly, update the address offset that
+      // will be added to DWARF DW_AT_* attributes.
+      if (Obj.isWasm())
+        WasmCodeSectionOffset = Section.getAddress();
+    }
   }
 
   // Process the symbol table.
diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp
index 4469092..6bdcbc0 100644
--- a/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This implements the LVELFReader class.
-// It supports ELF and Mach-O formats.
+// It supports ELF, Mach-O and Wasm binary formats.
 //
 //===----------------------------------------------------------------------===//
 
@@ -415,6 +415,8 @@ void LVELFReader::processOneAttribute(const DWARFDie &Die, LVOffset *OffsetPtr,
       if (FoundLowPC) {
         if (CurrentLowPC == MaxAddress)
           CurrentElement->setIsDiscarded();
+        // Consider the case of WebAssembly.
+        CurrentLowPC += WasmCodeSectionOffset;
         if (CurrentElement->isCompileUnit())
           setCUBaseAddress(CurrentLowPC);
       }
@@ -429,10 +431,17 @@ void LVELFReader::processOneAttribute(const DWARFDie &Die, LVOffset *OffsetPtr,
         CurrentHighPC = *Address;
       if (std::optional<uint64_t> Offset = FormValue.getAsUnsignedConstant())
         // High PC is an offset from LowPC.
-        CurrentHighPC = CurrentLowPC + *Offset;
+        // Don't add the WebAssembly offset if we have seen a DW_AT_low_pc, as
+        // the CurrentLowPC has already that offset added. Basically, use the
+        // original DW_AT_loc_pc value.
+        CurrentHighPC =
+            (FoundLowPC ? CurrentLowPC - WasmCodeSectionOffset : CurrentLowPC) +
+            *Offset;
       // Store the real upper limit for the address range.
       if (UpdateHighAddress && CurrentHighPC > 0)
         --CurrentHighPC;
+      // Consider the case of WebAssembly.
+      CurrentHighPC += WasmCodeSectionOffset;
       if (CurrentElement->isCompileUnit())
         setCUHighAddress(CurrentHighPC);
     }
@@ -466,6 +475,9 @@ void LVELFReader::processOneAttribute(const DWARFDie &Die, LVOffset *OffsetPtr,
         // Store the real upper limit for the address range.
         if (UpdateHighAddress && Range.HighPC > 0)
           --Range.HighPC;
+        // Consider the case of WebAssembly.
+        Range.LowPC += WasmCodeSectionOffset;
+        Range.HighPC += WasmCodeSectionOffset;
         // Add the pair of addresses.
         CurrentScope->addObject(Range.LowPC, Range.HighPC);
         // If the scope is the CU, do not update the ranges set.
@@ -735,7 +747,8 @@ void LVELFReader::createLineAndFileRecords(
       // and they will be released when its scope parent is deleted.
       LVLineDebug *Line = createLineDebug();
       CULines.push_back(Line);
-      Line->setAddress(Row.Address.Address);
+      // Consider the case of WebAssembly.
+      Line->setAddress(Row.Address.Address + WasmCodeSectionOffset);
       Line->setFilename(
           CompileUnit->getFilename(IncrementIndex ? Row.File + 1 : Row.File));
       Line->setLineNumber(Row.Line);
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 5f29226..32c5e32 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -628,13 +628,20 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
   ObjectPair Objects = ObjectsOrErr.get();
 
   std::unique_ptr<DIContext> Context;
-  // If this is a COFF object containing PDB info, use a PDBContext to
-  // symbolize. Otherwise, use DWARF.
+  // If this is a COFF object containing PDB info and not containing DWARF
+  // section, use a PDBContext to symbolize. Otherwise, use DWARF.
   if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
     const codeview::DebugInfo *DebugInfo;
     StringRef PDBFileName;
     auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName);
-    if (!EC && DebugInfo != nullptr && !PDBFileName.empty()) {
+    // Use DWARF if there're DWARF sections.
+    bool HasDwarf =
+        llvm::any_of(Objects.first->sections(), [](SectionRef Section) -> bool {
+          if (Expected<StringRef> SectionName = Section.getName())
+            return SectionName.get() == ".debug_info";
+          return false;
+        });
+    if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) {
       using namespace pdb;
       std::unique_ptr<IPDBSession> Session;
 
diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index 994acf5..1fa8a12 100644
--- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -255,7 +255,7 @@ struct ObjCImageInfoFlags {
 namespace llvm {
 namespace orc {
 
-MachOPlatform::HeaderOptions::BuildVersionOpts
+std::optional<MachOPlatform::HeaderOptions::BuildVersionOpts>
 MachOPlatform::HeaderOptions::BuildVersionOpts::fromTriple(const Triple &TT,
                                                            uint32_t MinOS,
                                                            uint32_t SDK) {
@@ -263,26 +263,25 @@ MachOPlatform::HeaderOptions::BuildVersionOpts::fromTriple(const Triple &TT,
   uint32_t Platform;
   switch (TT.getOS()) {
   case Triple::IOS:
-    Platform = TT.isSimulatorEnvironment() ? MachO::PLATFORM_IOS
-                                           : MachO::PLATFORM_IOSSIMULATOR;
+    Platform = TT.isSimulatorEnvironment() ? MachO::PLATFORM_IOSSIMULATOR
+                                           : MachO::PLATFORM_IOS;
     break;
   case Triple::MacOSX:
     Platform = MachO::PLATFORM_MACOS;
     break;
   case Triple::TvOS:
-    Platform = TT.isSimulatorEnvironment() ? MachO::PLATFORM_TVOS
-                                           : MachO::PLATFORM_TVOSSIMULATOR;
+    Platform = TT.isSimulatorEnvironment() ? MachO::PLATFORM_TVOSSIMULATOR
+                                           : MachO::PLATFORM_TVOS;
     break;
   case Triple::WatchOS:
-    Platform = TT.isSimulatorEnvironment() ? MachO::PLATFORM_WATCHOS
-                                           : MachO::PLATFORM_WATCHOSSIMULATOR;
+    Platform = TT.isSimulatorEnvironment() ? MachO::PLATFORM_WATCHOSSIMULATOR
+                                           : MachO::PLATFORM_WATCHOS;
     break;
   default:
-    Platform = MachO::PLATFORM_UNKNOWN;
-    break;
+    return std::nullopt;
   }
 
-  return {Platform, MinOS, SDK};
+  return MachOPlatform::HeaderOptions::BuildVersionOpts{Platform, MinOS, SDK};
 }
 
 Expected<std::unique_ptr<MachOPlatform>> MachOPlatform::Create(
@@ -1725,11 +1724,9 @@ jitlink::Block &createHeaderBlock(MachOPlatform &MOP,
   else
     B.template addLoadCommand<MachO::LC_ID_DYLIB>(JD.getName(), 0, 0, 0);
 
-  if (Opts.BuildVersion)
+  for (auto &BV : Opts.BuildVersions)
     B.template addLoadCommand<MachO::LC_BUILD_VERSION>(
-        Opts.BuildVersion->Platform, Opts.BuildVersion->MinOS,
-        Opts.BuildVersion->SDK, static_cast<uint32_t>(0));
-
+        BV.Platform, BV.MinOS, BV.SDK, static_cast<uint32_t>(0));
   for (auto &D : Opts.LoadDylibs)
     B.template addLoadCommand<MachO::LC_LOAD_DYLIB>(
         D.Name, D.Timestamp, D.CurrentVersion, D.CompatibilityVersion);
diff --git a/llvm/lib/Frontend/OpenMP/OMPContext.cpp b/llvm/lib/Frontend/OpenMP/OMPContext.cpp
index e870c5a..37936d6 100644
--- a/llvm/lib/Frontend/OpenMP/OMPContext.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPContext.cpp
@@ -44,6 +44,7 @@ OMPContext::OMPContext(bool IsDeviceCompilation, Triple TargetTriple) {
   case Triple::ppcle:
   case Triple::ppc64:
   case Triple::ppc64le:
+  case Triple::systemz:
   case Triple::x86:
   case Triple::x86_64:
     ActiveTraits.set(unsigned(TraitProperty::device_kind_cpu));
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 2599239..be0abb4 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5178,72 +5178,6 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
     Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
 }
 
-// Check if the module attribute is present and not zero.
-static bool isModuleAttributeSet(Module &M, const StringRef &ModAttr) {
-  const auto *Attr =
-      mdconst::extract_or_null<ConstantInt>(M.getModuleFlag(ModAttr));
-  return Attr && Attr->getZExtValue();
-}
-
-// Copy an attribute from module to the function if exists.
-// First value of the pair is used when the module attribute is not zero
-// the second otherwise.
-static void
-CopyModuleAttributeToFunction(Function &F, StringRef FnAttrName,
-                              StringRef ModAttrName,
-                              std::pair<StringRef, StringRef> Values) {
-  if (F.hasFnAttribute(FnAttrName))
-    return;
-  F.addFnAttr(FnAttrName, isModuleAttributeSet(*F.getParent(), ModAttrName)
-                              ? Values.first
-                              : Values.second);
-}
-
-// Copy a boolean attribute from module to the function if exists.
-// Module attribute treated false if zero otherwise true.
-static void CopyModuleAttributeToFunction(Function &F, StringRef AttrName) {
-  CopyModuleAttributeToFunction(
-      F, AttrName, AttrName,
-      std::make_pair<StringRef, StringRef>("true", "false"));
-}
-
-// Copy an attribute from module to the function if exists.
-// First value of the pair is used when the module attribute is not zero
-// the second otherwise.
-static void
-CopyModuleAttributeToFunction(Function &F, StringRef AttrName,
-                              std::pair<StringRef, StringRef> Values) {
-  CopyModuleAttributeToFunction(F, AttrName, AttrName, Values);
-}
-
-void llvm::CopyModuleAttrToFunctions(Module &M) {
-  Triple T(M.getTargetTriple());
-  if (!T.isThumb() && !T.isARM() && !T.isAArch64())
-    return;
-
-  for (Function &F : M.getFunctionList()) {
-    if (F.isDeclaration())
-      continue;
-
-    if (!F.hasFnAttribute("sign-return-address")) {
-      StringRef SignType = "none";
-      if (isModuleAttributeSet(M, "sign-return-address"))
-        SignType = "non-leaf";
-
-      if (isModuleAttributeSet(M, "sign-return-address-all"))
-        SignType = "all";
-
-      F.addFnAttr("sign-return-address", SignType);
-    }
-    CopyModuleAttributeToFunction(F, "branch-target-enforcement");
-    CopyModuleAttributeToFunction(F, "branch-protection-pauth-lr");
-    CopyModuleAttributeToFunction(F, "guarded-control-stack");
-    CopyModuleAttributeToFunction(
-        F, "sign-return-address-key",
-        std::make_pair<StringRef, StringRef>("b_key", "a_key"));
-  }
-}
-
 static bool isOldLoopArgument(Metadata *MD) {
   auto *T = dyn_cast_or_null<MDTuple>(MD);
   if (!T)
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 4dd1bdd..3932576 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -36,6 +36,11 @@ cl::opt<bool>
                                  "through iterators, eliminating intrinsics"),
                         cl::init(true));
 
+bool WriteNewDbgInfoFormatToBitcode /*set default value in cl::init() below*/;
+cl::opt<bool, true> WriteNewDbgInfoFormatToBitcode2(
+    "write-experimental-debuginfo-iterators-to-bitcode", cl::Hidden,
+    cl::location(WriteNewDbgInfoFormatToBitcode), cl::init(true));
+
 DPMarker *BasicBlock::createMarker(Instruction *I) {
   assert(IsNewDbgInfoFormat &&
          "Tried to create a marker in a non new debug-info block!");
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index d168950..8587f17 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -1828,7 +1828,7 @@ void at::deleteAll(Function *F) {
   SmallVector<DPValue *, 12> DPToDelete;
   for (BasicBlock &BB : *F) {
     for (Instruction &I : BB) {
-      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
+      for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange()))
         if (DPV.isDbgAssign())
           DPToDelete.push_back(&DPV);
       if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(&I))
@@ -2257,7 +2257,7 @@ bool AssignmentTrackingPass::runOnFunction(Function &F) {
   };
   for (auto &BB : F) {
     for (auto &I : BB) {
-      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
+      for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange())) {
         if (DPV.isDbgDeclare())
           ProcessDeclare(&DPV, DPVDeclares);
       }
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 28f9665..f37add2 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -1880,11 +1880,12 @@ DIExpression *DIExpression::append(const DIExpression *Expr,
 DIExpression *DIExpression::appendToStack(const DIExpression *Expr,
                                           ArrayRef<uint64_t> Ops) {
   assert(Expr && !Ops.empty() && "Can't append ops to this expression");
-  assert(none_of(Ops,
-                 [](uint64_t Op) {
-                   return Op == dwarf::DW_OP_stack_value ||
-                          Op == dwarf::DW_OP_LLVM_fragment;
-                 }) &&
+  assert(std::none_of(expr_op_iterator(Ops.begin()),
+                      expr_op_iterator(Ops.end()),
+                      [](auto Op) {
+                        return Op.getOp() == dwarf::DW_OP_stack_value ||
+                               Op.getOp() == dwarf::DW_OP_LLVM_fragment;
+                      }) &&
          "Can't append this op");
 
   // Append a DW_OP_deref after Expr's current op list if it's non-empty and
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index b09b80f..d6746d1 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -918,12 +918,14 @@ CallInst *IRBuilderBase::CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V,
   return createCallHelper(Fn, {V}, Name, FMFSource);
 }
 
-CallInst *IRBuilderBase::CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS,
-                                               Value *RHS,
-                                               Instruction *FMFSource,
-                                               const Twine &Name) {
+Value *IRBuilderBase::CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS,
+                                            Value *RHS, Instruction *FMFSource,
+                                            const Twine &Name) {
   Module *M = BB->getModule();
   Function *Fn = Intrinsic::getDeclaration(M, ID, { LHS->getType() });
+  if (Value *V = Folder.FoldBinaryIntrinsic(ID, LHS, RHS, Fn->getReturnType(),
+                                            FMFSource))
+    return V;
   return createCallHelper(Fn, {LHS, RHS}, Name, FMFSource);
 }
 
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index a9afb6f7..a7e6db8 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/Linker/IRMover.h"
 #include "LinkDiagnosticInfo.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
@@ -1546,7 +1547,23 @@ Error IRLinker::run() {
     if (Error Err = SrcM->getMaterializer()->materializeMetadata())
       return Err;
 
-  DstM.IsNewDbgInfoFormat = SrcM->IsNewDbgInfoFormat;
+  // Convert source module to match dest for the duration of the link.
+  bool SrcModuleNewDbgFormat = SrcM->IsNewDbgInfoFormat;
+  if (DstM.IsNewDbgInfoFormat != SrcM->IsNewDbgInfoFormat) {
+    if (DstM.IsNewDbgInfoFormat)
+      SrcM->convertToNewDbgValues();
+    else
+      SrcM->convertFromNewDbgValues();
+  }
+  // Undo debug mode conversion afterwards.
+  auto Cleanup = make_scope_exit([&]() {
+    if (SrcModuleNewDbgFormat != SrcM->IsNewDbgInfoFormat) {
+      if (SrcModuleNewDbgFormat)
+        SrcM->convertToNewDbgValues();
+      else
+        SrcM->convertFromNewDbgValues();
+    }
+  });
 
   // Inherit the target data from the source module if the destination module
   // doesn't have one already.
@@ -1606,11 +1623,6 @@ Error IRLinker::run() {
   // Loop over all of the linked values to compute type mappings.
   computeTypeMapping();
 
-  // Convert module level attributes to function level attributes because
-  // after merging modules the attributes might change and would have different
-  // effect on the functions as the original module would have.
-  CopyModuleAttrToFunctions(*SrcM);
-
   std::reverse(Worklist.begin(), Worklist.end());
   while (!Worklist.empty()) {
     GlobalValue *GV = Worklist.back();
@@ -1780,8 +1792,6 @@ IRMover::IRMover(Module &M) : Composite(M) {
 Error IRMover::move(std::unique_ptr<Module> Src,
                     ArrayRef<GlobalValue *> ValuesToLink,
                     LazyCallback AddLazyFor, bool IsPerformingImport) {
-  if (getModule().IsNewDbgInfoFormat)
-    Src->convertToNewDbgValues();
   IRLinker TheIRLinker(Composite, SharedMDs, IdentifiedStructTypes,
                        std::move(Src), ValuesToLink, std::move(AddLazyFor),
                        IsPerformingImport);
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index aa57e55..97d0e4f 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -795,23 +795,31 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
       Entry.second = Entry.second > 1 ? 1 : 0;
   }
 
+  std::vector<std::unique_ptr<SymbolicFile>> SymFiles;
+
+  if (NeedSymbols != SymtabWritingMode::NoSymtab || isAIXBigArchive(Kind)) {
+    for (const NewArchiveMember &M : NewMembers) {
+      Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr =
+          getSymbolicFile(M.Buf->getMemBufferRef(), Context);
+      if (!SymFileOrErr)
+        return createFileError(M.MemberName, SymFileOrErr.takeError());
+      SymFiles.push_back(std::move(*SymFileOrErr));
+    }
+  }
+
   // The big archive format needs to know the offset of the previous member
   // header.
   uint64_t PrevOffset = 0;
   uint64_t NextMemHeadPadSize = 0;
-  std::unique_ptr<SymbolicFile> CurSymFile;
-  std::unique_ptr<SymbolicFile> NextSymFile;
-  uint16_t Index = 0;
 
-  for (auto M = NewMembers.begin(); M < NewMembers.end(); ++M) {
+  for (uint32_t Index = 0; Index < NewMembers.size(); ++Index) {
+    const NewArchiveMember *M = &NewMembers[Index];
     std::string Header;
     raw_string_ostream Out(Header);
 
     MemoryBufferRef Buf = M->Buf->getMemBufferRef();
     StringRef Data = Thin ? "" : Buf.getBuffer();
 
-    Index++;
-
     // ld64 expects the members to be 8-byte aligned for 64-bit content and at
     // least 4-byte aligned for 32-bit content.  Opt for the larger encoding
     // uniformly.  This matches the behaviour with cctools and ensures that ld64
@@ -837,29 +845,9 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
           std::move(StringMsg), object::object_error::parse_failed);
     }
 
-    if (NeedSymbols != SymtabWritingMode::NoSymtab || isAIXBigArchive(Kind)) {
-      auto SetNextSymFile = [&NextSymFile,
-                             &Context](MemoryBufferRef Buf,
-                                       StringRef MemberName) -> Error {
-        Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr =
-            getSymbolicFile(Buf, Context);
-        if (!SymFileOrErr)
-          return createFileError(MemberName, SymFileOrErr.takeError());
-        NextSymFile = std::move(*SymFileOrErr);
-        return Error::success();
-      };
-
-      if (M == NewMembers.begin())
-        if (Error Err = SetNextSymFile(Buf, M->MemberName))
-          return std::move(Err);
-
-      CurSymFile = std::move(NextSymFile);
-
-      if ((M + 1) != NewMembers.end())
-        if (Error Err = SetNextSymFile((M + 1)->Buf->getMemBufferRef(),
-                                       (M + 1)->MemberName))
-          return std::move(Err);
-    }
+    std::unique_ptr<SymbolicFile> CurSymFile;
+    if (!SymFiles.empty())
+      CurSymFile = std::move(SymFiles[Index]);
 
     // In the big archive file format, we need to calculate and include the next
     // member offset and previous member offset in the file member header.
@@ -880,13 +868,13 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
 
       // If there is another member file after this, we need to calculate the
       // padding before the header.
-      if ((M + 1) != NewMembers.end()) {
-        uint64_t OffsetToNextMemData = NextOffset +
-                                       sizeof(object::BigArMemHdrType) +
-                                       alignTo((M + 1)->MemberName.size(), 2);
+      if (Index + 1 != SymFiles.size()) {
+        uint64_t OffsetToNextMemData =
+            NextOffset + sizeof(object::BigArMemHdrType) +
+            alignTo(NewMembers[Index + 1].MemberName.size(), 2);
         NextMemHeadPadSize =
             alignToPowerOf2(OffsetToNextMemData,
-                            getMemberAlignment(NextSymFile.get())) -
+                            getMemberAlignment(SymFiles[Index + 1].get())) -
             OffsetToNextMemData;
         NextOffset += NextMemHeadPadSize;
       }
@@ -902,7 +890,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
     std::vector<unsigned> Symbols;
     if (NeedSymbols != SymtabWritingMode::NoSymtab) {
       Expected<std::vector<unsigned>> SymbolsOrErr =
-          getSymbols(CurSymFile.get(), Index, SymNames, SymMap);
+          getSymbols(CurSymFile.get(), Index + 1, SymNames, SymMap);
       if (!SymbolsOrErr)
         return createFileError(M->MemberName, SymbolsOrErr.takeError());
       Symbols = std::move(*SymbolsOrErr);
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index e757c73..d9fe88a 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -60,16 +60,16 @@ public:
   // \c patch can only be called when all data is written and flushed.
   // For raw_string_ostream, the patch is done on the target string
   // directly and it won't be reflected in the stream's internal buffer.
-  void patch(PatchItem *P, int NItems) {
+  void patch(ArrayRef<PatchItem> P) {
     using namespace support;
 
     if (IsFDOStream) {
       raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
       const uint64_t LastPos = FDOStream.tell();
-      for (int K = 0; K < NItems; K++) {
-        FDOStream.seek(P[K].Pos);
-        for (int I = 0; I < P[K].N; I++)
-          write(P[K].D[I]);
+      for (const auto &K : P) {
+        FDOStream.seek(K.Pos);
+        for (int I = 0; I < K.N; I++)
+          write(K.D[I]);
       }
       // Reset the stream to the last position after patching so that users
       // don't accidentally overwrite data. This makes it consistent with
@@ -78,11 +78,11 @@ public:
     } else {
       raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
       std::string &Data = SOStream.str(); // with flush
-      for (int K = 0; K < NItems; K++) {
-        for (int I = 0; I < P[K].N; I++) {
+      for (const auto &K : P) {
+        for (int I = 0; I < K.N; I++) {
           uint64_t Bytes =
-              endian::byte_swap<uint64_t, llvm::endianness::little>(P[K].D[I]);
-          Data.replace(P[K].Pos + I * sizeof(uint64_t), sizeof(uint64_t),
+              endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]);
+          Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t),
                        (const char *)&Bytes, sizeof(uint64_t));
         }
       }
@@ -575,7 +575,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
         {MemProfSectionStart + sizeof(uint64_t), &FramePayloadOffset, 1},
         {MemProfSectionStart + 2 * sizeof(uint64_t), &FrameTableOffset, 1},
     };
-    OS.patch(PatchItems, 3);
+    OS.patch(PatchItems);
   }
 
   // BinaryIdSection has two parts:
@@ -693,7 +693,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
         {CSSummaryOffset, reinterpret_cast<uint64_t *>(TheCSSummary.get()),
          (int)CSSummarySize}};
 
-    OS.patch(PatchItems, std::size(PatchItems));
+    OS.patch(PatchItems);
   } else {
     // Now do the final patch:
     PatchItem PatchItems[] = {
@@ -713,7 +713,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
         {CSSummaryOffset, reinterpret_cast<uint64_t *>(TheCSSummary.get()),
          (int)CSSummarySize}};
 
-    OS.patch(PatchItems, std::size(PatchItems));
+    OS.patch(PatchItems);
   }
 
   for (const auto &I : FunctionData)
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index e686b97..7a383c3 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -2022,6 +2022,13 @@ APInt APInt::ushl_ov(unsigned ShAmt, bool &Overflow) const {
   return *this << ShAmt;
 }
 
+APInt APInt::sfloordiv_ov(const APInt &RHS, bool &Overflow) const {
+  APInt quotient = sdiv_ov(RHS, Overflow);
+  if ((quotient * RHS != *this) && (isNegative() != RHS.isNegative()))
+    return quotient - 1;
+  return quotient;
+}
+
 APInt APInt::sadd_sat(const APInt &RHS) const {
   bool Overflow;
   APInt Res = sadd_ov(RHS, Overflow);
@@ -3094,3 +3101,23 @@ void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
   }
 }
+
+APInt APIntOps::avgFloorS(const APInt &C1, const APInt &C2) {
+  // Return floor((C1 + C2) / 2)
+  return (C1 & C2) + (C1 ^ C2).ashr(1);
+}
+
+APInt APIntOps::avgFloorU(const APInt &C1, const APInt &C2) {
+  // Return floor((C1 + C2) / 2)
+  return (C1 & C2) + (C1 ^ C2).lshr(1);
+}
+
+APInt APIntOps::avgCeilS(const APInt &C1, const APInt &C2) {
+  // Return ceil((C1 + C2) / 2)
+  return (C1 | C2) - (C1 ^ C2).ashr(1);
+}
+
+APInt APIntOps::avgCeilU(const APInt &C1, const APInt &C2) {
+  // Return ceil((C1 + C2) / 2)
+  return (C1 | C2) - (C1 ^ C2).lshr(1);
+}
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index 051dd2a..057f8eae 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -1344,7 +1344,7 @@ std::error_code RedirectingFileSystem::isLocal(const Twine &Path_,
   SmallString<256> Path;
   Path_.toVector(Path);
 
-  if (makeCanonical(Path))
+  if (makeAbsolute(Path))
     return {};
 
   return ExternalFS->isLocal(Path, Result);
@@ -1411,7 +1411,7 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir,
   SmallString<256> Path;
   Dir.toVector(Path);
 
-  EC = makeCanonical(Path);
+  EC = makeAbsolute(Path);
   if (EC)
     return {};
 
@@ -2261,8 +2261,8 @@ void RedirectingFileSystem::LookupResult::getPath(
   llvm::sys::path::append(Result, E->getName());
 }
 
-std::error_code
-RedirectingFileSystem::makeCanonical(SmallVectorImpl<char> &Path) const {
+std::error_code RedirectingFileSystem::makeCanonicalForLookup(
+    SmallVectorImpl<char> &Path) const {
   if (std::error_code EC = makeAbsolute(Path))
     return EC;
 
@@ -2277,12 +2277,16 @@ RedirectingFileSystem::makeCanonical(SmallVectorImpl<char> &Path) const {
 
 ErrorOr<RedirectingFileSystem::LookupResult>
 RedirectingFileSystem::lookupPath(StringRef Path) const {
+  llvm::SmallString<128> CanonicalPath(Path);
+  if (std::error_code EC = makeCanonicalForLookup(CanonicalPath))
+    return EC;
+
   // RedirectOnly means the VFS is always used.
   if (UsageTrackingActive && Redirection == RedirectKind::RedirectOnly)
     HasBeenUsed = true;
 
-  sys::path::const_iterator Start = sys::path::begin(Path);
-  sys::path::const_iterator End = sys::path::end(Path);
+  sys::path::const_iterator Start = sys::path::begin(CanonicalPath);
+  sys::path::const_iterator End = sys::path::end(CanonicalPath);
   llvm::SmallVector<Entry *, 32> Entries;
   for (const auto &Root : Roots) {
     ErrorOr<RedirectingFileSystem::LookupResult> Result =
@@ -2358,14 +2362,14 @@ static Status getRedirectedFileStatus(const Twine &OriginalPath,
 }
 
 ErrorOr<Status> RedirectingFileSystem::status(
-    const Twine &CanonicalPath, const Twine &OriginalPath,
+    const Twine &LookupPath, const Twine &OriginalPath,
     const RedirectingFileSystem::LookupResult &Result) {
   if (std::optional<StringRef> ExtRedirect = Result.getExternalRedirect()) {
-    SmallString<256> CanonicalRemappedPath((*ExtRedirect).str());
-    if (std::error_code EC = makeCanonical(CanonicalRemappedPath))
+    SmallString<256> RemappedPath((*ExtRedirect).str());
+    if (std::error_code EC = makeAbsolute(RemappedPath))
       return EC;
 
-    ErrorOr<Status> S = ExternalFS->status(CanonicalRemappedPath);
+    ErrorOr<Status> S = ExternalFS->status(RemappedPath);
     if (!S)
       return S;
     S = Status::copyWithNewName(*S, *ExtRedirect);
@@ -2375,13 +2379,13 @@ ErrorOr<Status> RedirectingFileSystem::status(
   }
 
   auto *DE = cast<RedirectingFileSystem::DirectoryEntry>(Result.E);
-  return Status::copyWithNewName(DE->getStatus(), CanonicalPath);
+  return Status::copyWithNewName(DE->getStatus(), LookupPath);
 }
 
 ErrorOr<Status>
-RedirectingFileSystem::getExternalStatus(const Twine &CanonicalPath,
+RedirectingFileSystem::getExternalStatus(const Twine &LookupPath,
                                          const Twine &OriginalPath) const {
-  auto Result = ExternalFS->status(CanonicalPath);
+  auto Result = ExternalFS->status(LookupPath);
 
   // The path has been mapped by some nested VFS, don't override it with the
   // original path.
@@ -2391,38 +2395,37 @@ RedirectingFileSystem::getExternalStatus(const Twine &CanonicalPath,
 }
 
 ErrorOr<Status> RedirectingFileSystem::status(const Twine &OriginalPath) {
-  SmallString<256> CanonicalPath;
-  OriginalPath.toVector(CanonicalPath);
+  SmallString<256> Path;
+  OriginalPath.toVector(Path);
 
-  if (std::error_code EC = makeCanonical(CanonicalPath))
+  if (std::error_code EC = makeAbsolute(Path))
     return EC;
 
   if (Redirection == RedirectKind::Fallback) {
     // Attempt to find the original file first, only falling back to the
     // mapped file if that fails.
-    ErrorOr<Status> S = getExternalStatus(CanonicalPath, OriginalPath);
+    ErrorOr<Status> S = getExternalStatus(Path, OriginalPath);
     if (S)
       return S;
   }
 
-  ErrorOr<RedirectingFileSystem::LookupResult> Result =
-      lookupPath(CanonicalPath);
+  ErrorOr<RedirectingFileSystem::LookupResult> Result = lookupPath(Path);
   if (!Result) {
     // Was not able to map file, fallthrough to using the original path if
     // that was the specified redirection type.
     if (Redirection == RedirectKind::Fallthrough &&
         isFileNotFound(Result.getError()))
-      return getExternalStatus(CanonicalPath, OriginalPath);
+      return getExternalStatus(Path, OriginalPath);
     return Result.getError();
   }
 
-  ErrorOr<Status> S = status(CanonicalPath, OriginalPath, *Result);
+  ErrorOr<Status> S = status(Path, OriginalPath, *Result);
   if (!S && Redirection == RedirectKind::Fallthrough &&
       isFileNotFound(S.getError(), Result->E)) {
     // Mapped the file but it wasn't found in the underlying filesystem,
     // fallthrough to using the original path if that was the specified
     // redirection type.
-    return getExternalStatus(CanonicalPath, OriginalPath);
+    return getExternalStatus(Path, OriginalPath);
   }
 
   return S;
@@ -2471,30 +2474,27 @@ File::getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P) {
 
 ErrorOr<std::unique_ptr<File>>
 RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) {
-  SmallString<256> CanonicalPath;
-  OriginalPath.toVector(CanonicalPath);
+  SmallString<256> Path;
+  OriginalPath.toVector(Path);
 
-  if (std::error_code EC = makeCanonical(CanonicalPath))
+  if (std::error_code EC = makeAbsolute(Path))
     return EC;
 
   if (Redirection == RedirectKind::Fallback) {
     // Attempt to find the original file first, only falling back to the
     // mapped file if that fails.
-    auto F = File::getWithPath(ExternalFS->openFileForRead(CanonicalPath),
-                               OriginalPath);
+    auto F = File::getWithPath(ExternalFS->openFileForRead(Path), OriginalPath);
     if (F)
       return F;
   }
 
-  ErrorOr<RedirectingFileSystem::LookupResult> Result =
-      lookupPath(CanonicalPath);
+  ErrorOr<RedirectingFileSystem::LookupResult> Result = lookupPath(Path);
   if (!Result) {
     // Was not able to map file, fallthrough to using the original path if
     // that was the specified redirection type.
     if (Redirection == RedirectKind::Fallthrough &&
         isFileNotFound(Result.getError()))
-      return File::getWithPath(ExternalFS->openFileForRead(CanonicalPath),
-                               OriginalPath);
+      return File::getWithPath(ExternalFS->openFileForRead(Path), OriginalPath);
     return Result.getError();
   }
 
@@ -2502,22 +2502,21 @@ RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) {
     return make_error_code(llvm::errc::invalid_argument);
 
   StringRef ExtRedirect = *Result->getExternalRedirect();
-  SmallString<256> CanonicalRemappedPath(ExtRedirect.str());
-  if (std::error_code EC = makeCanonical(CanonicalRemappedPath))
+  SmallString<256> RemappedPath(ExtRedirect.str());
+  if (std::error_code EC = makeAbsolute(RemappedPath))
     return EC;
 
   auto *RE = cast<RedirectingFileSystem::RemapEntry>(Result->E);
 
-  auto ExternalFile = File::getWithPath(
-      ExternalFS->openFileForRead(CanonicalRemappedPath), ExtRedirect);
+  auto ExternalFile =
+      File::getWithPath(ExternalFS->openFileForRead(RemappedPath), ExtRedirect);
   if (!ExternalFile) {
     if (Redirection == RedirectKind::Fallthrough &&
         isFileNotFound(ExternalFile.getError(), Result->E)) {
       // Mapped the file but it wasn't found in the underlying filesystem,
       // fallthrough to using the original path if that was the specified
       // redirection type.
-      return File::getWithPath(ExternalFS->openFileForRead(CanonicalPath),
-                               OriginalPath);
+      return File::getWithPath(ExternalFS->openFileForRead(Path), OriginalPath);
     }
     return ExternalFile;
   }
@@ -2537,28 +2536,27 @@ RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) {
 std::error_code
 RedirectingFileSystem::getRealPath(const Twine &OriginalPath,
                                    SmallVectorImpl<char> &Output) const {
-  SmallString<256> CanonicalPath;
-  OriginalPath.toVector(CanonicalPath);
+  SmallString<256> Path;
+  OriginalPath.toVector(Path);
 
-  if (std::error_code EC = makeCanonical(CanonicalPath))
+  if (std::error_code EC = makeAbsolute(Path))
     return EC;
 
   if (Redirection == RedirectKind::Fallback) {
     // Attempt to find the original file first, only falling back to the
     // mapped file if that fails.
-    std::error_code EC = ExternalFS->getRealPath(CanonicalPath, Output);
+    std::error_code EC = ExternalFS->getRealPath(Path, Output);
     if (!EC)
       return EC;
   }
 
-  ErrorOr<RedirectingFileSystem::LookupResult> Result =
-      lookupPath(CanonicalPath);
+  ErrorOr<RedirectingFileSystem::LookupResult> Result = lookupPath(Path);
   if (!Result) {
     // Was not able to map file, fallthrough to using the original path if
     // that was the specified redirection type.
     if (Redirection == RedirectKind::Fallthrough &&
         isFileNotFound(Result.getError()))
-      return ExternalFS->getRealPath(CanonicalPath, Output);
+      return ExternalFS->getRealPath(Path, Output);
     return Result.getError();
   }
 
@@ -2571,7 +2569,7 @@ RedirectingFileSystem::getRealPath(const Twine &OriginalPath,
       // Mapped the file but it wasn't found in the underlying filesystem,
       // fallthrough to using the original path if that was the specified
       // redirection type.
-      return ExternalFS->getRealPath(CanonicalPath, Output);
+      return ExternalFS->getRealPath(Path, Output);
     }
     return P;
   }
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index b2c52b4..03f0778 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -987,7 +987,7 @@ AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
   // Expand the pseudo into smstart or smstop instruction. The pseudo has the
   // following operands:
   //
-  //   MSRpstatePseudo <za|sm|both>, <0|1>, pstate.sm, expectedval, <regmask>
+  //   MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>
   //
   // The pseudo is expanded into a conditional smstart/smstop, with a
   // check if pstate.sm (register) equals the expected value, and if not,
@@ -997,9 +997,9 @@ AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
   // streaming-compatible function:
   //
   // OrigBB:
-  //   MSRpstatePseudo 3, 0, %0, 0, <regmask>             <- Conditional SMSTOP
+  //   MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask>  <- Cond SMSTOP
   //   bl @normal_callee
-  //   MSRpstatePseudo 3, 1, %0, 0, <regmask>             <- Conditional SMSTART
+  //   MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask>  <- Cond SMSTART
   //
   // ...which will be transformed into:
   //
@@ -1022,11 +1022,20 @@ AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
   // We test the live value of pstate.sm and toggle pstate.sm if this is not the
   // expected value for the callee (0 for a normal callee and 1 for a streaming
   // callee).
-  auto PStateSM = MI.getOperand(2).getReg();
+  unsigned Opc;
+  switch (MI.getOperand(2).getImm()) {
+  case AArch64SME::Always:
+    llvm_unreachable("Should have matched to instruction directly");
+  case AArch64SME::IfCallerIsStreaming:
+    Opc = AArch64::TBNZW;
+    break;
+  case AArch64SME::IfCallerIsNonStreaming:
+    Opc = AArch64::TBZW;
+    break;
+  }
+  auto PStateSM = MI.getOperand(3).getReg();
   auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
   unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
-  bool IsStreamingCallee = MI.getOperand(3).getImm();
-  unsigned Opc = IsStreamingCallee ? AArch64::TBZW : AArch64::TBNZW;
   MachineInstrBuilder Tbx =
       BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9665ae5..7720e0a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5270,13 +5270,13 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
         AArch64ISD::SMSTART, DL, MVT::Other,
         Op->getOperand(0), // Chain
         DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
-        DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+        DAG.getConstant(AArch64SME::Always, DL, MVT::i64));
   case Intrinsic::aarch64_sme_za_disable:
     return DAG.getNode(
         AArch64ISD::SMSTOP, DL, MVT::Other,
         Op->getOperand(0), // Chain
         DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
-        DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+        DAG.getConstant(AArch64SME::Always, DL, MVT::i64));
   }
 }
 
@@ -7197,11 +7197,11 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
           getRegClassFor(PStateSM.getValueType().getSimpleVT()));
       FuncInfo->setPStateSMReg(Reg);
       Chain = DAG.getCopyToReg(Chain, DL, Reg, PStateSM);
-    } else {
-      PStateSM = DAG.getConstant(0, DL, MVT::i64);
-    }
-    Chain = changeStreamingMode(DAG, DL, /*Enable*/ true, Chain, Glue, PStateSM,
-                                /*Entry*/ true);
+      Chain = changeStreamingMode(DAG, DL, /*Enable*/ true, Chain, Glue,
+                                  AArch64SME::IfCallerIsNonStreaming, PStateSM);
+    } else
+      Chain = changeStreamingMode(DAG, DL, /*Enable*/ true, Chain, Glue,
+                                  AArch64SME::Always);
 
     // Ensure that the SMSTART happens after the CopyWithChain such that its
     // chain result is used.
@@ -7786,9 +7786,11 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
   }
 }
 
-SDValue AArch64TargetLowering::changeStreamingMode(
-    SelectionDAG &DAG, SDLoc DL, bool Enable,
-    SDValue Chain, SDValue InGlue, SDValue PStateSM, bool Entry) const {
+SDValue AArch64TargetLowering::changeStreamingMode(SelectionDAG &DAG, SDLoc DL,
+                                                   bool Enable, SDValue Chain,
+                                                   SDValue InGlue,
+                                                   unsigned Condition,
+                                                   SDValue PStateSM) const {
   MachineFunction &MF = DAG.getMachineFunction();
   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
   FuncInfo->setHasStreamingModeChanges(true);
@@ -7797,10 +7799,13 @@ SDValue AArch64TargetLowering::changeStreamingMode(
   SDValue RegMask = DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask());
   SDValue MSROp =
       DAG.getTargetConstant((int32_t)AArch64SVCR::SVCRSM, DL, MVT::i32);
-
-  SDValue ExpectedSMVal =
-      DAG.getTargetConstant(Entry ? Enable : !Enable, DL, MVT::i64);
-  SmallVector<SDValue> Ops = {Chain, MSROp, PStateSM, ExpectedSMVal, RegMask};
+  SDValue ConditionOp = DAG.getTargetConstant(Condition, DL, MVT::i64);
+  SmallVector<SDValue> Ops = {Chain, MSROp, ConditionOp};
+  if (Condition != AArch64SME::Always) {
+    assert(PStateSM && "PStateSM should be defined");
+    Ops.push_back(PStateSM);
+  }
+  Ops.push_back(RegMask);
 
   if (InGlue)
     Ops.push_back(InGlue);
@@ -7809,6 +7814,19 @@ SDValue AArch64TargetLowering::changeStreamingMode(
   return DAG.getNode(Opcode, DL, DAG.getVTList(MVT::Other, MVT::Glue), Ops);
 }
 
+static unsigned getSMCondition(const SMEAttrs &CallerAttrs,
+                               const SMEAttrs &CalleeAttrs) {
+  if (!CallerAttrs.hasStreamingCompatibleInterface() ||
+      CallerAttrs.hasStreamingBody())
+    return AArch64SME::Always;
+  if (CalleeAttrs.hasNonStreamingInterface())
+    return AArch64SME::IfCallerIsStreaming;
+  if (CalleeAttrs.hasStreamingInterface())
+    return AArch64SME::IfCallerIsNonStreaming;
+
+  llvm_unreachable("Unsupported attributes");
+}
+
 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
 /// and add input and output parameter nodes.
 SDValue
@@ -8028,7 +8046,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     Chain = DAG.getNode(
         AArch64ISD::SMSTOP, DL, MVT::Other, Chain,
         DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
-        DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+        DAG.getConstant(AArch64SME::Always, DL, MVT::i64));
 
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
@@ -8299,9 +8317,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   SDValue InGlue;
   if (RequiresSMChange) {
-    SDValue NewChain =
-        changeStreamingMode(DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain,
-                            InGlue, PStateSM, true);
+    SDValue NewChain = changeStreamingMode(
+        DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue,
+        getSMCondition(CallerAttrs, CalleeAttrs), PStateSM);
     Chain = NewChain.getValue(0);
     InGlue = NewChain.getValue(1);
   }
@@ -8455,8 +8473,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   if (RequiresSMChange) {
     assert(PStateSM && "Expected a PStateSM to be set");
-    Result = changeStreamingMode(DAG, DL, !CalleeAttrs.hasStreamingInterface(),
-                                 Result, InGlue, PStateSM, false);
+    Result = changeStreamingMode(
+        DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue,
+        getSMCondition(CallerAttrs, CalleeAttrs), PStateSM);
   }
 
   if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs))
@@ -8464,7 +8483,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     Result = DAG.getNode(
         AArch64ISD::SMSTART, DL, MVT::Other, Result,
         DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
-        DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+        DAG.getConstant(AArch64SME::Always, DL, MVT::i64));
 
   if (ShouldPreserveZT0)
     Result =
@@ -8599,13 +8618,12 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
       Register Reg = FuncInfo->getPStateSMReg();
       assert(Reg.isValid() && "PStateSM Register is invalid");
       SDValue PStateSM = DAG.getCopyFromReg(Chain, DL, Reg, MVT::i64);
-      Chain =
-          changeStreamingMode(DAG, DL, /*Enable*/ false, Chain,
-                              /*Glue*/ SDValue(), PStateSM, /*Entry*/ false);
+      Chain = changeStreamingMode(DAG, DL, /*Enable*/ false, Chain,
+                                  /*Glue*/ SDValue(),
+                                  AArch64SME::IfCallerIsNonStreaming, PStateSM);
     } else
-      Chain = changeStreamingMode(
-          DAG, DL, /*Enable*/ false, Chain,
-          /*Glue*/ SDValue(), DAG.getConstant(1, DL, MVT::i64), /*Entry*/ true);
+      Chain = changeStreamingMode(DAG, DL, /*Enable*/ false, Chain,
+                                  /*Glue*/ SDValue(), AArch64SME::Always);
     Glue = Chain.getValue(1);
   }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 68341c19..89016cb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -968,12 +968,12 @@ public:
   bool shouldExpandCttzElements(EVT VT) const override;
 
   /// If a change in streaming mode is required on entry to/return from a
-  /// function call it emits and returns the corresponding SMSTART or SMSTOP node.
-  /// \p Entry tells whether this is before/after the Call, which is necessary
-  /// because PSTATE.SM is only queried once.
+  /// function call it emits and returns the corresponding SMSTART or SMSTOP
+  /// node. \p Condition should be one of the enum values from
+  /// AArch64SME::ToggleCondition.
   SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
-                              SDValue Chain, SDValue InGlue,
-                              SDValue PStateSM, bool Entry) const;
+                              SDValue Chain, SDValue InGlue, unsigned Condition,
+                              SDValue PStateSM = SDValue()) const;
 
   bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
 
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index ec59778..ad29003 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -1072,3 +1072,8 @@ bool AArch64RegisterInfo::shouldCoalesce(
 
   return true;
 }
+
+bool AArch64RegisterInfo::shouldAnalyzePhysregInMachineLoopInfo(
+    MCRegister R) const {
+  return R == AArch64::VG;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index e93352c..5c8a5e0 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -145,6 +145,8 @@ public:
 
   void getOffsetOpcodes(const StackOffset &Offset,
                         SmallVectorImpl<uint64_t> &Ops) const override;
+
+  bool shouldAnalyzePhysregInMachineLoopInfo(MCRegister R) const override;
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 2907ba7..1554f1c 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -10,12 +10,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-def AArch64_smstart : SDNode<"AArch64ISD::SMSTART", SDTypeProfile<0, 3,
-                             [SDTCisInt<0>, SDTCisInt<0>, SDTCisInt<0>]>,
+def AArch64_smstart : SDNode<"AArch64ISD::SMSTART", SDTypeProfile<0, 2,
+                             [SDTCisInt<0>, SDTCisInt<0>]>,
                              [SDNPHasChain, SDNPSideEffect, SDNPVariadic,
                               SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64_smstop  : SDNode<"AArch64ISD::SMSTOP", SDTypeProfile<0, 3,
-                             [SDTCisInt<0>, SDTCisInt<0>, SDTCisInt<0>]>,
+def AArch64_smstop  : SDNode<"AArch64ISD::SMSTOP", SDTypeProfile<0, 2,
+                             [SDTCisInt<0>, SDTCisInt<0>]>,
                              [SDNPHasChain, SDNPSideEffect, SDNPVariadic,
                               SDNPOptInGlue, SDNPOutGlue]>;
 def AArch64_restore_za : SDNode<"AArch64ISD::RESTORE_ZA", SDTypeProfile<0, 3,
@@ -158,34 +158,6 @@ def : Pat<(AArch64_restore_za
             (i64 GPR64:$tpidr2_el0), (i64 GPR64sp:$tpidr2obj), (i64 texternalsym:$restore_routine)),
           (RestoreZAPseudo GPR64:$tpidr2_el0, GPR64sp:$tpidr2obj, texternalsym:$restore_routine)>;
 
-// Scenario A:
-//
-//   %pstate.before.call = 1
-//   if (%pstate.before.call != 0)
-//     smstop (pstate_za|pstate_sm)
-//   call fn()
-//   if (%pstate.before.call != 0)
-//     smstart (pstate_za|pstate_sm)
-//
-def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 1), (i64 0)),   // before call
-          (MSRpstatesvcrImm1 svcr_op:$pstate, 0b0)>;
-def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 1), (i64 0)),  // after call
-          (MSRpstatesvcrImm1 svcr_op:$pstate, 0b1)>;
-
-// Scenario B:
-//
-//   %pstate.before.call = 0
-//   if (%pstate.before.call != 1)
-//     smstart (pstate_za|pstate_sm)
-//   call fn()
-//   if (%pstate.before.call != 1)
-//     smstop (pstate_za|pstate_sm)
-//
-def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 0), (i64 1)),  // before call
-          (MSRpstatesvcrImm1 svcr_op:$pstate, 0b1)>;
-def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 0), (i64 1)),   // after call
-          (MSRpstatesvcrImm1 svcr_op:$pstate, 0b0)>;
-
 // Read and write TPIDR2_EL0
 def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val),
           (MSR 0xde85, GPR64:$val)>;
@@ -230,17 +202,24 @@ defm COALESCER_BARRIER : CoalescerBarriers;
 // SME instructions.
 def MSRpstatePseudo :
   Pseudo<(outs),
-           (ins svcr_op:$pstatefield, timm0_1:$imm, GPR64:$rtpstate, timm0_1:$expected_pstate, variable_ops), []>,
+           (ins svcr_op:$pstatefield, timm0_1:$imm, timm0_31:$condition, variable_ops), []>,
     Sched<[WriteSys]> {
   let hasPostISelHook = 1;
   let Uses = [VG];
   let Defs = [VG];
 }
 
-def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 GPR64:$rtpstate), (i64 timm0_1:$expected_pstate)),
-          (MSRpstatePseudo svcr_op:$pstate, 0b1, GPR64:$rtpstate, timm0_1:$expected_pstate)>;
-def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 GPR64:$rtpstate), (i64 timm0_1:$expected_pstate)),
-          (MSRpstatePseudo svcr_op:$pstate, 0b0, GPR64:$rtpstate, timm0_1:$expected_pstate)>;
+def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 timm0_31:$condition)),
+          (MSRpstatePseudo svcr_op:$pstate, 0b1, timm0_31:$condition)>;
+def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 timm0_31:$condition)),
+          (MSRpstatePseudo svcr_op:$pstate, 0b0, timm0_31:$condition)>;
+
+// Unconditional start/stop
+def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 /*AArch64SME::Always*/0)),
+          (MSRpstatesvcrImm1 svcr_op:$pstate, 0b1)>;
+def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 /*AArch64SME::Always*/0)),
+          (MSRpstatesvcrImm1 svcr_op:$pstate, 0b0)>;
+
 
 //===----------------------------------------------------------------------===//
 // SME2 Instructions
diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index ef7c517..f2812d2 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -533,7 +533,9 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
     if (Info.AI->hasName())
       TagPCall->setName(Info.AI->getName() + ".tag");
     // Does not replace metadata, so we don't have to handle DPValues.
-    Info.AI->replaceNonMetadataUsesWith(TagPCall);
+    Info.AI->replaceUsesWithIf(TagPCall, [&](const Use &U) {
+      return !memtag::isLifetimeIntrinsic(U.getUser());
+    });
     TagPCall->setOperand(0, Info.AI);
 
     // Calls to functions that may return twice (e.g. setjmp) confuse the
@@ -550,7 +552,7 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
       uint64_t Size =
           cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
       Size = alignTo(Size, kTagGranuleSize);
-      tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
+      tagAlloca(AI, Start->getNextNode(), TagPCall, Size);
 
       auto TagEnd = [&](Instruction *Node) { untagAlloca(AI, Node, Size); };
       if (!DT || !PDT ||
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index c19e02b..8267502 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -5160,9 +5160,9 @@ multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
             (cmp $Op1, $Op2, $Op3)>;
   def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, invcc)),
             (cmp $Op1, $Op3, $Op2)>;
-  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))),
+  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (SVEAllActive)), intvt:$Op2, intvt:$Op3, cc))),
             (cmp $Pg, $Op2, $Op3)>;
-  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))),
+  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (SVEAllActive)), intvt:$Op2, intvt:$Op3, invcc))),
             (cmp $Pg, $Op3, $Op2)>;
 }
 
@@ -5172,9 +5172,9 @@ multiclass SVE_SETCC_Pat_With_Zero<CondCode cc, CondCode invcc, ValueType predvt
             (cmp $Op1, $Op2)>;
   def : Pat<(predvt (AArch64setcc_z predvt:$Op1, (SVEDup0), intvt:$Op2, invcc)),
             (cmp $Op1, $Op2)>;
-  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))),
+  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (SVEAllActive)), intvt:$Op1, (SVEDup0), cc))),
             (cmp $Pg, $Op1)>;
-  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))),
+  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (SVEAllActive)), (SVEDup0), intvt:$Op1, invcc))),
             (cmp $Pg, $Op1)>;
 }
 
@@ -5258,13 +5258,13 @@ multiclass SVE_SETCC_Imm_Pat<CondCode cc, CondCode commuted_cc,
                                     commuted_cc)),
             (cmp $Pg, $Zs1, immtype:$imm)>;
   def : Pat<(predvt (and predvt:$Pg,
-                         (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)),
+                         (AArch64setcc_z_oneuse (predvt (SVEAllActive)),
                                          (intvt ZPR:$Zs1),
                                          (intvt (splat_vector (immtype:$imm))),
                                          cc))),
             (cmp $Pg, $Zs1, immtype:$imm)>;
   def : Pat<(predvt (and predvt:$Pg,
-                         (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)),
+                         (AArch64setcc_z_oneuse (predvt (SVEAllActive)),
                                          (intvt (splat_vector (immtype:$imm))),
                                          (intvt ZPR:$Zs1),
                                          commuted_cc))),
@@ -5743,23 +5743,23 @@ multiclass sve_int_index_ir<string asm, SDPatternOperator mulop, SDPatternOperat
             (!cast<Instruction>(NAME # "_D") simm5_64b:$imm5, (SUBREG_TO_REG (i64 0), (!cast<Instruction>("MOVi32imm") (!cast<SDNodeXForm>("trunc_imm") $imm)), sub_32))>;
 
   // mul(step_vector(1), dup(Y)) -> index(0, Y).
-  def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))),
+  def : Pat<(mulop (nxv16i1 (SVEAllActive)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))),
             (!cast<Instruction>(NAME # "_B") (i32 0), GPR32:$Rm)>;
-  def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))),
+  def : Pat<(mulop (nxv8i1 (SVEAllActive)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))),
             (!cast<Instruction>(NAME # "_H") (i32 0), GPR32:$Rm)>;
-  def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))),
+  def : Pat<(mulop (nxv4i1 (SVEAllActive)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))),
             (!cast<Instruction>(NAME # "_S") (i32 0), GPR32:$Rm)>;
-  def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))),
+  def : Pat<(mulop (nxv2i1 (SVEAllActive)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))),
             (!cast<Instruction>(NAME # "_D") (i64 0), GPR64:$Rm)>;
 
   // add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y).
-  def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (nxv16i8 (splat_vector(simm5_8b:$imm5)))),
+  def : Pat<(add (muloneuseop (nxv16i1 (SVEAllActive)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (nxv16i8 (splat_vector(simm5_8b:$imm5)))),
             (!cast<Instruction>(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>;
-  def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))), (nxv8i16 (splat_vector(simm5_16b:$imm5)))),
+  def : Pat<(add (muloneuseop (nxv8i1 (SVEAllActive)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))), (nxv8i16 (splat_vector(simm5_16b:$imm5)))),
             (!cast<Instruction>(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>;
-  def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))), (nxv4i32 (splat_vector(simm5_32b:$imm5)))),
+  def : Pat<(add (muloneuseop (nxv4i1 (SVEAllActive)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))), (nxv4i32 (splat_vector(simm5_32b:$imm5)))),
             (!cast<Instruction>(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>;
-  def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))), (nxv2i64 (splat_vector(simm5_64b:$imm5)))),
+  def : Pat<(add (muloneuseop (nxv2i1 (SVEAllActive)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))), (nxv2i64 (splat_vector(simm5_64b:$imm5)))),
             (!cast<Instruction>(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>;
 }
 
@@ -5837,13 +5837,13 @@ multiclass sve_int_index_rr<string asm, SDPatternOperator mulop> {
             (!cast<Instruction>(NAME # "_D") GPR64:$Rn, (SUBREG_TO_REG (i64 0), (!cast<Instruction>("MOVi32imm") (!cast<SDNodeXForm>("trunc_imm") $imm)), sub_32))>;
 
   // add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y).
-  def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (nxv16i8 (splat_vector(i32 GPR32:$Rn)))),
+  def : Pat<(add (mulop (nxv16i1 (SVEAllActive)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (nxv16i8 (splat_vector(i32 GPR32:$Rn)))),
             (!cast<Instruction>(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>;
-  def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))),(nxv8i16 (splat_vector(i32 GPR32:$Rn)))),
+  def : Pat<(add (mulop (nxv8i1 (SVEAllActive)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))),(nxv8i16 (splat_vector(i32 GPR32:$Rn)))),
             (!cast<Instruction>(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>;
-  def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))),(nxv4i32 (splat_vector(i32 GPR32:$Rn)))),
+  def : Pat<(add (mulop (nxv4i1 (SVEAllActive)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))),(nxv4i32 (splat_vector(i32 GPR32:$Rn)))),
             (!cast<Instruction>(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>;
-  def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))),(nxv2i64 (splat_vector(i64 GPR64:$Rn)))),
+  def : Pat<(add (mulop (nxv2i1 (SVEAllActive)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))),(nxv2i64 (splat_vector(i64 GPR64:$Rn)))),
             (!cast<Instruction>(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>;
 }
 
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index ed8336a..f821bb5 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -591,6 +591,14 @@ namespace AArch64BTIHint {
   #include "AArch64GenSystemOperands.inc"
 }
 
+namespace AArch64SME {
+enum ToggleCondition : unsigned {
+  Always,
+  IfCallerIsStreaming,
+  IfCallerIsNonStreaming
+};
+}
+
 namespace AArch64SE {
     enum ShiftExtSpecifiers {
         Invalid = -1,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 8a71550..bee43b6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -582,6 +582,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
 
   setMaxAtomicSizeInBitsSupported(64);
   setMaxDivRemBitWidthSupported(64);
+  setMaxLargeFPConvertBitWidthSupported(64);
 }
 
 bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index fb829fa..5b7fa13 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -767,19 +767,21 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     // Checking for NaN before canonicalization provides better fidelity when
     // mapping other operations onto fmed3 since the order of operands is
     // unchanged.
-    CallInst *NewCall = nullptr;
+    Value *V = nullptr;
     if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
-      NewCall = IC.Builder.CreateMinNum(Src1, Src2);
+      V = IC.Builder.CreateMinNum(Src1, Src2);
     } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
-      NewCall = IC.Builder.CreateMinNum(Src0, Src2);
+      V = IC.Builder.CreateMinNum(Src0, Src2);
     } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
-      NewCall = IC.Builder.CreateMaxNum(Src0, Src1);
+      V = IC.Builder.CreateMaxNum(Src0, Src1);
     }
 
-    if (NewCall) {
-      NewCall->copyFastMathFlags(&II);
-      NewCall->takeName(&II);
-      return IC.replaceInstUsesWith(II, NewCall);
+    if (V) {
+      if (auto *CI = dyn_cast<CallInst>(V)) {
+        CI->copyFastMathFlags(&II);
+        CI->takeName(&II);
+      }
+      return IC.replaceInstUsesWith(II, V);
     }
 
     bool Swap = false;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 027dd0f..5297054 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDKernelCodeT.h"
+#include "MCTargetDesc/AMDGPUMCExpr.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "SIDefines.h"
@@ -1816,6 +1817,7 @@ private:
 
 public:
   void onBeginOfFile() override;
+  bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
 
   ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
 
@@ -8330,6 +8332,59 @@ void AMDGPUAsmParser::onBeginOfFile() {
     getTargetStreamer().EmitDirectiveAMDGCNTarget();
 }
 
+/// Parse AMDGPU specific expressions.
+///
+///  expr ::= or(expr, ...) |
+///           max(expr, ...)
+///
+bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+  using AGVK = AMDGPUVariadicMCExpr::VariadicKind;
+
+  if (isToken(AsmToken::Identifier)) {
+    StringRef TokenId = getTokenStr();
+    AGVK VK = StringSwitch<AGVK>(TokenId)
+                  .Case("max", AGVK::AGVK_Max)
+                  .Case("or", AGVK::AGVK_Or)
+                  .Default(AGVK::AGVK_None);
+
+    if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
+      SmallVector<const MCExpr *, 4> Exprs;
+      uint64_t CommaCount = 0;
+      lex(); // Eat 'max'/'or'
+      lex(); // Eat '('
+      while (true) {
+        if (trySkipToken(AsmToken::RParen)) {
+          if (Exprs.empty()) {
+            Error(getToken().getLoc(),
+                  "empty " + Twine(TokenId) + " expression");
+            return true;
+          }
+          if (CommaCount + 1 != Exprs.size()) {
+            Error(getToken().getLoc(),
+                  "mismatch of commas in " + Twine(TokenId) + " expression");
+            return true;
+          }
+          Res = AMDGPUVariadicMCExpr::create(VK, Exprs, getContext());
+          return false;
+        }
+        const MCExpr *Expr;
+        if (getParser().parseExpression(Expr, EndLoc))
+          return true;
+        Exprs.push_back(Expr);
+        bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
+        if (LastTokenWasComma)
+          CommaCount++;
+        if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
+          Error(getToken().getLoc(),
+                "unexpected token in " + Twine(TokenId) + " expression");
+          return true;
+        }
+      }
+    }
+  }
+  return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
+}
+
 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
   StringRef Name = getTokenStr();
   if (Name == "mul") {
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index f42d4ae..cc03ee6 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -115,8 +115,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
   bits<1> lds = ps.lds; // LDS DMA for global and scratch
 
   // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
-  bits<2> seg = !if(ps.is_flat_global, 0b10,
-                  !if(ps.is_flat_scratch, 0b01, 0));
+  bits<2> seg = {ps.is_flat_global, ps.is_flat_scratch};
 
   // Signed offset. Highest bit ignored for flat and treated as 12-bit
   // unsigned for flat accesses.
@@ -174,7 +173,7 @@ class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
   bits<8> vaddr;
   bits<24> offset;
 
-  let Inst{6-0} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
+  let Inst{6-0} = !if(ps.enabled_saddr, saddr, SGPR_NULL_gfx11plus.Index);
   let Inst{21-14} = op;
   let Inst{31-26} = 0x3b;
   let Inst{39-32} = !if(ps.has_vdst, vdst, ?);
@@ -1787,45 +1786,46 @@ def FLAT_STORE_DWORDX2_ci      : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
 def FLAT_STORE_DWORDX4_ci      : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
 def FLAT_STORE_DWORDX3_ci      : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
 
-multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
+multiclass FLAT_Real_Atomics_ci <bits<7> op> {
+  defvar ps = !cast<FLAT_Pseudo>(NAME);
   def _ci     : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
   def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
 }
 
-defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
-defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
-defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
-defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
-defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
-defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
-defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
-defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
-defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
-defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
-defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
-defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
-defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
-defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
-defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
-defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
-defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
-defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
-defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
-defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
-defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
-defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
-defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
-defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
-defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
-defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
+defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30>;
+defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31>;
+defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32>;
+defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33>;
+defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35>;
+defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36>;
+defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37>;
+defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38>;
+defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39>;
+defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a>;
+defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b>;
+defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c>;
+defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d>;
+defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50>;
+defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51>;
+defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52>;
+defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53>;
+defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55>;
+defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56>;
+defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57>;
+defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58>;
+defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59>;
+defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a>;
+defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b>;
+defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c>;
+defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d>;
 
 // CI Only flat instructions
-defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
-defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
-defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
-defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
-defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
-defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
+defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e>;
+defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f>;
+defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40>;
+defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e>;
+defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f>;
+defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60>;
 
 
 //===----------------------------------------------------------------------===//
@@ -1925,8 +1925,9 @@ def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
 def FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
 def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
 
-multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps,
+multiclass FLAT_Real_Atomics_vi <bits<7> op,
   bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
+  defvar ps = !cast<FLAT_Pseudo>(NAME);
   def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
   def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
 }
@@ -1939,32 +1940,32 @@ multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
 }
 
 
-defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
-defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
-defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
-defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
-defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
-defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
-defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
-defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
-defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
-defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
-defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
-defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
-defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
-defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
-defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
-defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
-defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
-defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
-defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
-defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
-defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
-defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
-defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
-defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
-defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
-defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
+defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40>;
+defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41>;
+defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42>;
+defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43>;
+defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44>;
+defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45>;
+defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46>;
+defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47>;
+defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48>;
+defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49>;
+defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a>;
+defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b>;
+defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c>;
+defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60>;
+defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61>;
+defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62>;
+defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63>;
+defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64>;
+defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65>;
+defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66>;
+defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67>;
+defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68>;
+defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69>;
+defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a>;
+defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b>;
+defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c>;
 
 defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
 defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
@@ -2060,9 +2061,9 @@ let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
 }
 
 let SubtargetPredicate = isGFX90AOnly in {
-  defm FLAT_ATOMIC_ADD_F64   : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>;
-  defm FLAT_ATOMIC_MIN_F64   : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>;
-  defm FLAT_ATOMIC_MAX_F64   : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>;
+  defm FLAT_ATOMIC_ADD_F64   : FLAT_Real_Atomics_vi<0x4f, 0>;
+  defm FLAT_ATOMIC_MIN_F64   : FLAT_Real_Atomics_vi<0x50, 0>;
+  defm FLAT_ATOMIC_MAX_F64   : FLAT_Real_Atomics_vi<0x51, 0>;
   defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>;
   defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>;
   defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>;
@@ -2073,7 +2074,8 @@ multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> {
   def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
 }
 
-multiclass FLAT_Real_Atomics_gfx940 <bits<7> op, FLAT_Pseudo ps> {
+multiclass FLAT_Real_Atomics_gfx940 <bits<7> op> {
+  defvar ps = !cast<FLAT_Pseudo>(NAME);
   def _gfx940     : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
   def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
 }
@@ -2089,15 +2091,15 @@ let SubtargetPredicate = isGFX940Plus in {
   defm GLOBAL_ATOMIC_ADD_F32     : FLAT_Global_Real_Atomics_gfx940 <0x04d>;
   defm GLOBAL_ATOMIC_PK_ADD_F16  : FLAT_Global_Real_Atomics_gfx940 <0x04e>;
 
-  defm FLAT_ATOMIC_ADD_F64       : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>;
-  defm FLAT_ATOMIC_MIN_F64       : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>;
-  defm FLAT_ATOMIC_MAX_F64       : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>;
+  defm FLAT_ATOMIC_ADD_F64       : FLAT_Real_Atomics_gfx940<0x4f>;
+  defm FLAT_ATOMIC_MIN_F64       : FLAT_Real_Atomics_gfx940<0x50>;
+  defm FLAT_ATOMIC_MAX_F64       : FLAT_Real_Atomics_gfx940<0x51>;
   defm GLOBAL_ATOMIC_ADD_F64     : FLAT_Global_Real_Atomics_gfx940<0x4f>;
   defm GLOBAL_ATOMIC_MIN_F64     : FLAT_Global_Real_Atomics_gfx940<0x50>;
   defm GLOBAL_ATOMIC_MAX_F64     : FLAT_Global_Real_Atomics_gfx940<0x51>;
-  defm FLAT_ATOMIC_ADD_F32       : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>;
-  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>;
-  defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>;
+  defm FLAT_ATOMIC_ADD_F32       : FLAT_Real_Atomics_vi<0x4d>;
+  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Real_Atomics_vi<0x4e>;
+  defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Real_Atomics_vi<0x52>;
   defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
 } // End SubtargetPredicate = isGFX940Plus
 
@@ -2112,7 +2114,9 @@ class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
 
   let Inst{11-0}  = offset{11-0};
   let Inst{12}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
-  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
+  let Inst{54-48} = !cond(ps.enabled_saddr : saddr,
+                          !and(ps.is_flat_scratch, !not(ps.has_vaddr)) : EXEC_HI.Index{6-0}, // ST mode
+                          true : SGPR_NULL_gfxpre11.Index{6-0});
   let Inst{55}    = 0;
 }
 
@@ -2140,7 +2144,6 @@ multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
 multiclass FLAT_Real_ST_gfx10<bits<7> op> {
   def _ST_gfx10 :
     FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> {
-      let Inst{54-48} = EXEC_HI.Index;
       let OtherPredicates = [HasFlatScratchSTMode];
     }
 }
@@ -2350,6 +2353,7 @@ class FLAT_Real_gfx11 <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic>
   let Inst{14}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
   let Inst{15}    = cpol{CPolBit.SLC};
   let Inst{17-16} = seg;
+  let Inst{54-48} = !if(ps.enabled_saddr, saddr, SGPR_NULL_gfx11plus.Index);
   let Inst{55}    = ps.sve;
 }
 
@@ -2360,15 +2364,11 @@ multiclass FLAT_Aliases_gfx11<string ps, string opName, int renamed> {
 
 multiclass FLAT_Real_Base_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
   FLAT_Aliases_gfx11<ps, opName, renamed> {
-  def _gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps), opName> {
-    let Inst{54-48} = SGPR_NULL_gfx11plus.Index;
-  }
+  def _gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps), opName>;
 }
 
 multiclass FLAT_Real_RTN_gfx11<bits<7> op, string ps, string opName> {
-  def _RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> {
-    let Inst{54-48} = SGPR_NULL_gfx11plus.Index;
-  }
+  def _RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName>;
 }
 
 multiclass FLAT_Real_SADDR_gfx11<bits<7> op, string ps, string opName> {
@@ -2381,7 +2381,6 @@ multiclass FLAT_Real_SADDR_RTN_gfx11<bits<7> op, string ps, string opName> {
 
 multiclass FLAT_Real_ST_gfx11<bits<7> op, string ps, string opName> {
   def _ST_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> {
-    let Inst{54-48} = SGPR_NULL_gfx11plus.Index;
     let OtherPredicates = [HasFlatScratchSTMode];
   }
 }
@@ -2562,8 +2561,7 @@ class VFLAT_Real_gfx12 <bits<8> op, FLAT_Pseudo ps,
   let AssemblerPredicate = isGFX12Plus;
   let DecoderNamespace = "GFX12";
 
-  let Inst{25-24} = !if(ps.is_flat_scratch, 0b01,
-                        !if(ps.is_flat_global, 0b10, 0b00));
+  let Inst{25-24} = {ps.is_flat_global, ps.is_flat_scratch};
 }
 
 multiclass VFLAT_Aliases_gfx12<string ps, string opName, int renamed, string alias> {
@@ -2576,15 +2574,11 @@ multiclass VFLAT_Aliases_gfx12<string ps, string opName, int renamed, string ali
 multiclass VFLAT_Real_Base_gfx12<bits<8> op, string ps = NAME, string opName = !tolower(NAME),
                                  int renamed = false, string alias = ""> :
   VFLAT_Aliases_gfx12<ps, opName, renamed, alias> {
-  def _gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps), opName> {
-    let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
-  }
+  def _gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps), opName>;
 }
 
 multiclass VFLAT_Real_RTN_gfx12<bits<8> op, string ps, string opName> {
-  def _RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> {
-    let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
-  }
+  def _RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName>;
 }
 
 multiclass VFLAT_Real_SADDR_gfx12<bits<8> op, string ps, string opName> {
@@ -2597,7 +2591,6 @@ multiclass VFLAT_Real_SADDR_RTN_gfx12<bits<8> op, string ps, string opName> {
 
 multiclass VFLAT_Real_ST_gfx12<bits<8> op, string ps, string opName> {
   def _ST_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> {
-    let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
     let OtherPredicates = [HasFlatScratchSTMode];
   }
 }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
new file mode 100644
index 0000000..4578c33
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
@@ -0,0 +1,115 @@
+//===- AMDGPUMCExpr.cpp - AMDGPU specific MC expression classes -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/raw_ostream.h"
+#include <optional>
+
+using namespace llvm;
+
+AMDGPUVariadicMCExpr::AMDGPUVariadicMCExpr(VariadicKind Kind,
+                                           ArrayRef<const MCExpr *> Args,
+                                           MCContext &Ctx)
+    : Kind(Kind), Ctx(Ctx) {
+  assert(Args.size() >= 1 && "Needs a minimum of one expression.");
+  assert(Kind != AGVK_None &&
+         "Cannot construct AMDGPUVariadicMCExpr of kind none.");
+
+  // Allocating the variadic arguments through the same allocation mechanism
+  // that the object itself is allocated with so they end up in the same memory.
+  //
+  // Will result in an asan failure if allocated on the heap through standard
+  // allocation (e.g., through SmallVector's grow).
+  RawArgs = static_cast<const MCExpr **>(
+      Ctx.allocate(sizeof(const MCExpr *) * Args.size()));
+  std::uninitialized_copy(Args.begin(), Args.end(), RawArgs);
+  this->Args = ArrayRef<const MCExpr *>(RawArgs, Args.size());
+}
+
+AMDGPUVariadicMCExpr::~AMDGPUVariadicMCExpr() { Ctx.deallocate(RawArgs); }
+
+const AMDGPUVariadicMCExpr *
+AMDGPUVariadicMCExpr::create(VariadicKind Kind, ArrayRef<const MCExpr *> Args,
+                             MCContext &Ctx) {
+  return new (Ctx) AMDGPUVariadicMCExpr(Kind, Args, Ctx);
+}
+
+const MCExpr *AMDGPUVariadicMCExpr::getSubExpr(size_t Index) const {
+  assert(Index < Args.size() &&
+         "Indexing out of bounds AMDGPUVariadicMCExpr sub-expr");
+  return Args[Index];
+}
+
+void AMDGPUVariadicMCExpr::printImpl(raw_ostream &OS,
+                                     const MCAsmInfo *MAI) const {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown AMDGPUVariadicMCExpr kind.");
+  case AGVK_Or:
+    OS << "or(";
+    break;
+  case AGVK_Max:
+    OS << "max(";
+    break;
+  }
+  for (auto It = Args.begin(); It != Args.end(); ++It) {
+    (*It)->print(OS, MAI, /*InParens=*/false);
+    if ((It + 1) != Args.end())
+      OS << ", ";
+  }
+  OS << ')';
+}
+
+static int64_t op(AMDGPUVariadicMCExpr::VariadicKind Kind, int64_t Arg1,
+                  int64_t Arg2) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown AMDGPUVariadicMCExpr kind.");
+  case AMDGPUVariadicMCExpr::AGVK_Max:
+    return std::max(Arg1, Arg2);
+  case AMDGPUVariadicMCExpr::AGVK_Or:
+    return Arg1 | Arg2;
+  }
+}
+
+bool AMDGPUVariadicMCExpr::evaluateAsRelocatableImpl(
+    MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const {
+  std::optional<int64_t> Total;
+
+  for (const MCExpr *Arg : Args) {
+    MCValue ArgRes;
+    if (!Arg->evaluateAsRelocatable(ArgRes, Layout, Fixup) ||
+        !ArgRes.isAbsolute())
+      return false;
+
+    if (!Total.has_value())
+      Total = ArgRes.getConstant();
+    Total = op(Kind, *Total, ArgRes.getConstant());
+  }
+
+  Res = MCValue::get(*Total);
+  return true;
+}
+
+void AMDGPUVariadicMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+  for (const MCExpr *Arg : Args)
+    Streamer.visitUsedExpr(*Arg);
+}
+
+MCFragment *AMDGPUVariadicMCExpr::findAssociatedFragment() const {
+  for (const MCExpr *Arg : Args) {
+    if (Arg->findAssociatedFragment())
+      return Arg->findAssociatedFragment();
+  }
+  return nullptr;
+}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
new file mode 100644
index 0000000..238e0de
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
@@ -0,0 +1,72 @@
+//===- AMDGPUMCExpr.h - AMDGPU specific MC expression classes ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCEXPR_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCEXPR_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+/// AMDGPU target specific variadic MCExpr operations.
+///
+/// Takes in a minimum of 1 argument to be used with an operation. The supported
+/// operations are:
+///   - (bitwise) or
+///   - max
+///
+/// \note If the 'or'/'max' operations are provided only a single argument, the
+/// operation will act as a no-op and simply resolve as the provided argument.
+///
+class AMDGPUVariadicMCExpr : public MCTargetExpr {
+public:
+  enum VariadicKind { AGVK_None, AGVK_Or, AGVK_Max };
+
+private:
+  VariadicKind Kind;
+  MCContext &Ctx;
+  const MCExpr **RawArgs;
+  ArrayRef<const MCExpr *> Args;
+
+  AMDGPUVariadicMCExpr(VariadicKind Kind, ArrayRef<const MCExpr *> Args,
+                       MCContext &Ctx);
+  ~AMDGPUVariadicMCExpr();
+
+public:
+  static const AMDGPUVariadicMCExpr *
+  create(VariadicKind Kind, ArrayRef<const MCExpr *> Args, MCContext &Ctx);
+
+  static const AMDGPUVariadicMCExpr *createOr(ArrayRef<const MCExpr *> Args,
+                                              MCContext &Ctx) {
+    return create(VariadicKind::AGVK_Or, Args, Ctx);
+  }
+
+  static const AMDGPUVariadicMCExpr *createMax(ArrayRef<const MCExpr *> Args,
+                                               MCContext &Ctx) {
+    return create(VariadicKind::AGVK_Max, Args, Ctx);
+  }
+
+  VariadicKind getKind() const { return Kind; }
+  const MCExpr *getSubExpr(size_t Index) const;
+
+  void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+  bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+                                 const MCFixup *Fixup) const override;
+  void visitUsedExpr(MCStreamer &Streamer) const override;
+  MCFragment *findAssociatedFragment() const override;
+  void fixELFSymbolsInTLSFixups(MCAssembler &) const override{};
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCEXPR_H
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 5dc7607..0842a58 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_component_library(LLVMAMDGPUDesc
   AMDGPUInstPrinter.cpp
   AMDGPUMCAsmInfo.cpp
   AMDGPUMCCodeEmitter.cpp
+  AMDGPUMCExpr.cpp
   AMDGPUMCTargetDesc.cpp
   AMDGPUTargetStreamer.cpp
   R600InstPrinter.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 0b516bf..1f0207d 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -460,7 +460,8 @@ enum Id { // Message ID, width(4) [3:0].
   ID_RTN_GET_REALTIME = 131,
   ID_RTN_SAVE_WAVE = 132,
   ID_RTN_GET_TBA = 133,
-  ID_RTN_GET_SE_AID_ID = 134,
+  ID_RTN_GET_TBA_TO_PC = 134,
+  ID_RTN_GET_SE_AID_ID = 135,
 
   ID_MASK_PreGFX11_ = 0xF,
   ID_MASK_GFX11Plus_ = 0xFF
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index a6184c5..2762190 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -485,6 +485,16 @@ public:
   virtual AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const = 0;
 
   virtual ~WaitcntGenerator() = default;
+
+  // Create a mask value from the initializer list of wait event types.
+  static constexpr unsigned
+  eventMask(std::initializer_list<WaitEventType> Events) {
+    unsigned Mask = 0;
+    for (auto &E : Events)
+      Mask |= 1 << E;
+
+    return Mask;
+  }
 };
 
 class WaitcntGeneratorPreGFX12 : public WaitcntGenerator {
@@ -506,14 +516,12 @@ public:
     assert(ST);
 
     static const unsigned WaitEventMaskForInstPreGFX12[NUM_INST_CNTS] = {
-        (1 << VMEM_ACCESS) | (1 << VMEM_READ_ACCESS) |
-            (1 << VMEM_SAMPLER_READ_ACCESS) | (1 << VMEM_BVH_READ_ACCESS),
-        (1 << SMEM_ACCESS) | (1 << LDS_ACCESS) | (1 << GDS_ACCESS) |
-            (1 << SQ_MESSAGE),
-        (1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) |
-            (1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS) |
-            (1 << EXP_LDS_ACCESS),
-        (1 << VMEM_WRITE_ACCESS) | (1 << SCRATCH_WRITE_ACCESS),
+        eventMask({VMEM_ACCESS, VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS,
+                   VMEM_BVH_READ_ACCESS}),
+        eventMask({SMEM_ACCESS, LDS_ACCESS, GDS_ACCESS, SQ_MESSAGE}),
+        eventMask({EXP_GPR_LOCK, GDS_GPR_LOCK, VMW_GPR_LOCK, EXP_PARAM_ACCESS,
+                   EXP_POS_ACCESS, EXP_LDS_ACCESS}),
+        eventMask({VMEM_WRITE_ACCESS, SCRATCH_WRITE_ACCESS}),
         0,
         0,
         0};
@@ -543,15 +551,14 @@ public:
     assert(ST);
 
     static const unsigned WaitEventMaskForInstGFX12Plus[NUM_INST_CNTS] = {
-        (1 << VMEM_ACCESS) | (1 << VMEM_READ_ACCESS),
-        (1 << LDS_ACCESS) | (1 << GDS_ACCESS),
-        (1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) |
-            (1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS) |
-            (1 << EXP_LDS_ACCESS),
-        (1 << VMEM_WRITE_ACCESS) | (1 << SCRATCH_WRITE_ACCESS),
-        (1 << VMEM_SAMPLER_READ_ACCESS),
-        (1 << VMEM_BVH_READ_ACCESS),
-        (1 << SMEM_ACCESS) | (1 << SQ_MESSAGE)};
+        eventMask({VMEM_ACCESS, VMEM_READ_ACCESS}),
+        eventMask({LDS_ACCESS, GDS_ACCESS}),
+        eventMask({EXP_GPR_LOCK, GDS_GPR_LOCK, VMW_GPR_LOCK, EXP_PARAM_ACCESS,
+                   EXP_POS_ACCESS, EXP_LDS_ACCESS}),
+        eventMask({VMEM_WRITE_ACCESS, SCRATCH_WRITE_ACCESS}),
+        eventMask({VMEM_SAMPLER_READ_ACCESS}),
+        eventMask({VMEM_BVH_READ_ACCESS}),
+        eventMask({SMEM_ACCESS, SQ_MESSAGE})};
 
     return WaitEventMaskForInstGFX12Plus;
   }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 835a5a2..1694436 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -835,6 +835,19 @@ def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{
   return fp16SrcZerosHighBits(N->getOpcode());
 }]>;
 
+def is_canonicalized : PatLeaf<(fAny srcvalue:$src), [{
+  const SITargetLowering &Lowering =
+      *static_cast<const SITargetLowering *>(getTargetLowering());
+  return Lowering.isCanonicalized(*CurDAG, SDValue(N, 0));
+}]> {
+  let GISelPredicateCode = [{
+    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
+        MF.getSubtarget().getTargetLowering());
+    const MachineOperand &Dst = MI.getOperand(0);
+    assert(Dst.isDef());
+    return TLI->isCanonicalized(Dst.getReg(), MF);
+   }];
+}
 
 //===----------------------------------------------------------------------===//
 // MUBUF/SMEM Patterns
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 3ab7884..1c942dc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2946,30 +2946,12 @@ def : GCNPat<
 
 // If fcanonicalize's operand is implicitly canonicalized, we only need a copy.
 let AddedComplexity = 1000 in {
-def : GCNPat<
-  (is_canonicalized_1<fcanonicalize> f16:$src),
-  (COPY f16:$src)
->;
-
-def : GCNPat<
-  (is_canonicalized_1<fcanonicalize> v2f16:$src),
-  (COPY v2f16:$src)
->;
-
-def : GCNPat<
-  (is_canonicalized_1<fcanonicalize> f32:$src),
-  (COPY f32:$src)
->;
-
-def : GCNPat<
-  (is_canonicalized_1<fcanonicalize> v2f32:$src),
-  (COPY v2f32:$src)
->;
-
-def : GCNPat<
-  (is_canonicalized_1<fcanonicalize> f64:$src),
-  (COPY f64:$src)
->;
+foreach vt = [f16, v2f16, f32, v2f32, f64] in {
+  def : GCNPat<
+    (fcanonicalize (vt is_canonicalized:$src)),
+    (COPY vt:$src)
+  >;
+}
 }
 
 // Prefer selecting to max when legal, but using mul is always valid.
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 5c64c6b..5cd10aa 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2265,10 +2265,10 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
           TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
           *MI->memoperands_begin(), RS);
-      
-      if (IsWWMRegSpill) 
+
+      if (IsWWMRegSpill)
         TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
-  
+
       MI->eraseFromParent();
       return true;
     }
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index bccdf4b..1159c4e 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -744,19 +744,19 @@ def S_NOR_B64 : SOP2_64 <"s_nor_b64",
 
 // There are also separate patterns for types other than i32
 def S_ANDN2_B32 : SOP2_32 <"s_andn2_b32",
-  [(set i32:$sdst, (UniformBinFrag<and> i32:$src0, (UniformUnaryFrag<not> i32:$src1)))]
+  [(set i32:$sdst, (UniformBinFrag<and> i32:$src0, (not i32:$src1)))]
 >;
 
 def S_ANDN2_B64 : SOP2_64 <"s_andn2_b64",
-  [(set i64:$sdst, (UniformBinFrag<and> i64:$src0, (UniformUnaryFrag<not> i64:$src1)))]
+  [(set i64:$sdst, (UniformBinFrag<and> i64:$src0, (not i64:$src1)))]
 >;
 
 def S_ORN2_B32 : SOP2_32 <"s_orn2_b32",
-  [(set i32:$sdst, (UniformBinFrag<or> i32:$src0, (UniformUnaryFrag<not> i32:$src1)))]
+  [(set i32:$sdst, (UniformBinFrag<or> i32:$src0, (not i32:$src1)))]
 >;
 
 def S_ORN2_B64 : SOP2_64 <"s_orn2_b64",
-  [(set i64:$sdst, (UniformBinFrag<or> i64:$src0, (UniformUnaryFrag<not> i64:$src1)))]
+  [(set i64:$sdst, (UniformBinFrag<or> i64:$src0, (not i64:$src1)))]
 >;
 } // End Defs = [SCC]
 
@@ -1905,21 +1905,20 @@ def : GCNPat<
   (S_AND_B32 (S_MOV_B32 (i32 0xffff)), $src)
 >;
 
-// FIXME: ValueType should have isVector field
 class ScalarNot2Pat<Instruction inst, SDPatternOperator op, ValueType vt,
-                    bit isVector = 1> : GCNPat<
-  (UniformBinFrag<op> vt:$src0, (UniformUnaryFrag<!if(isVector, vnot, not)> vt:$src1)),
+                    SDPatternOperator notnode = !if(vt.isVector, vnot, not)> : GCNPat<
+  (UniformBinFrag<op> vt:$src0, (notnode vt:$src1)),
   (inst getSOPSrcForVT<vt>.ret:$src0, getSOPSrcForVT<vt>.ret:$src1)
 >;
 
 // Match these for some more types
 // TODO: i1
-def : ScalarNot2Pat<S_ANDN2_B32, and, i16, 0>;
+def : ScalarNot2Pat<S_ANDN2_B32, and, i16>;
 def : ScalarNot2Pat<S_ANDN2_B32, and, v2i16>;
 def : ScalarNot2Pat<S_ANDN2_B64, and, v4i16>;
 def : ScalarNot2Pat<S_ANDN2_B64, and, v2i32>;
 
-def : ScalarNot2Pat<S_ORN2_B32, or, i16, 0>;
+def : ScalarNot2Pat<S_ORN2_B32, or, i16>;
 def : ScalarNot2Pat<S_ORN2_B32, or, v2i16>;
 def : ScalarNot2Pat<S_ORN2_B64, or, v4i16>;
 def : ScalarNot2Pat<S_ORN2_B64, or, v2i32>;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index 23434d2..d468b14 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -60,6 +60,7 @@ const CustomOperand<const MCSubtargetInfo &> Msg[] = {
   {{"MSG_RTN_GET_REALTIME"},    ID_RTN_GET_REALTIME,        isGFX11Plus},
   {{"MSG_RTN_SAVE_WAVE"},       ID_RTN_SAVE_WAVE,           isGFX11Plus},
   {{"MSG_RTN_GET_TBA"},         ID_RTN_GET_TBA,             isGFX11Plus},
+  {{"MSG_RTN_GET_TBA_TO_PC"},   ID_RTN_GET_TBA_TO_PC,       isGFX11Plus},
   {{"MSG_RTN_GET_SE_AID_ID"},   ID_RTN_GET_SE_AID_ID,       isGFX12Plus},
 };
 // clang-format on
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
index 68e14f7..fe0d3b6 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -1387,7 +1387,7 @@ let mayLoad = 1, hasSideEffects = 0,
 
 // Load indirect with displacement operations.
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
-  let Constraints = "@earlyclobber $reg" in def LDDRdPtrQ
+  def LDDRdPtrQ
       : FSTDLDD<0,
                 (outs GPR8
                  : $reg),
diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 3145bc3..1688355 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -232,6 +232,7 @@ public:
         .Case("callx", true)
         .Case("goto", true)
         .Case("gotol", true)
+        .Case("may_goto", true)
         .Case("*", true)
         .Case("exit", true)
         .Case("lock", true)
diff --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
index edd59aa..2b78ed7 100644
--- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
+++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
@@ -15,7 +15,7 @@
 //   - remove llvm.bpf.getelementptr.and.load builtins.
 //   - remove llvm.bpf.getelementptr.and.store builtins.
 //   - for loads and stores with base addresses from non-zero address space
-//     cast base address to zero address space (support for BPF arenas).
+//     cast base address to zero address space (support for BPF address spaces).
 //
 //===----------------------------------------------------------------------===//
 
@@ -482,7 +482,7 @@ static void aspaceWrapOperand(DenseMap<Value *, Value *> &Cache, Instruction *I,
   }
 }
 
-// Support for BPF arenas:
+// Support for BPF address spaces:
 // - for each function in the module M, update pointer operand of
 //   each memory access instruction (load/store/cmpxchg/atomicrmw)
 //   by casting it from non-zero address space to zero address space, e.g:
@@ -490,7 +490,7 @@ static void aspaceWrapOperand(DenseMap<Value *, Value *> &Cache, Instruction *I,
 //   (load (ptr addrspace (N) %p) ...)
 //     -> (load (addrspacecast ptr addrspace (N) %p to ptr))
 //
-// - assign section with name .arena.N for globals defined in
+// - assign section with name .addr_space.N for globals defined in
 //   non-zero address space N
 bool BPFCheckAndAdjustIR::insertASpaceCasts(Module &M) {
   bool Changed = false;
@@ -517,13 +517,13 @@ bool BPFCheckAndAdjustIR::insertASpaceCasts(Module &M) {
     Changed |= !CastsCache.empty();
   }
   // Merge all globals within same address space into single
-  // .arena.<addr space no> section
+  // .addr_space.<addr space no> section
   for (GlobalVariable &G : M.globals()) {
     if (G.getAddressSpace() == 0 || G.hasSection())
       continue;
     SmallString<16> SecName;
     raw_svector_ostream OS(SecName);
-    OS << ".arena." << G.getAddressSpace();
+    OS << ".addr_space." << G.getAddressSpace();
     G.setSection(SecName);
     // Prevent having separate section for constants
     G.setConstant(false);
diff --git a/llvm/lib/Target/BPF/BPFInstrFormats.td b/llvm/lib/Target/BPF/BPFInstrFormats.td
index 6ed83d8..feffdbc 100644
--- a/llvm/lib/Target/BPF/BPFInstrFormats.td
+++ b/llvm/lib/Target/BPF/BPFInstrFormats.td
@@ -73,6 +73,7 @@ def BPF_JLT  : BPFJumpOp<0xa>;
 def BPF_JLE  : BPFJumpOp<0xb>;
 def BPF_JSLT : BPFJumpOp<0xc>;
 def BPF_JSLE : BPFJumpOp<0xd>;
+def BPF_JCOND : BPFJumpOp<0xe>;
 
 class BPFWidthModifer<bits<2> val> {
   bits<2> Value = val;
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 7198e94..66c5795 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -215,6 +215,18 @@ class JMP_RI<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
   let BPFClass = BPF_JMP;
 }
 
+class JMP_JCOND<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
+    : TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
+                   (outs),
+                   (ins brtarget:$BrDst),
+                   !strconcat(OpcodeStr, " $BrDst"),
+                   Pattern> {
+  bits<16> BrDst;
+
+  let Inst{47-32} = BrDst;
+  let BPFClass = BPF_JMP;
+}
+
 class JMP_RR_32<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
     : TYPE_ALU_JMP<Opc.Value, BPF_X.Value,
                    (outs),
@@ -267,6 +279,7 @@ defm JULE : J<BPF_JLE, "<=", BPF_CC_LEU, BPF_CC_LEU_32>;
 defm JSLT : J<BPF_JSLT, "s<", BPF_CC_LT, BPF_CC_LT_32>;
 defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE, BPF_CC_LE_32>;
 defm JSET : J<BPF_JSET, "&", NoCond, NoCond>;
+def JCOND : JMP_JCOND<BPF_JCOND, "may_goto", []>;
 }
 
 // ALU instructions
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 7dad408..4493238 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -81,7 +81,10 @@ public:
     // The target is the 3rd operand of cond inst and the 1st of uncond inst.
     int32_t Imm;
     if (isConditionalBranch(Inst)) {
-      Imm = (short)Inst.getOperand(2).getImm();
+      if (Inst.getOpcode() == BPF::JCOND)
+        Imm = (short)Inst.getOperand(0).getImm();
+      else
+        Imm = (short)Inst.getOperand(2).getImm();
     } else if (isUnconditionalBranch(Inst)) {
       if (Inst.getOpcode() == BPF::JMP)
         Imm = (short)Inst.getOperand(0).getImm();
diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt
index bf93280..4c70b3f 100644
--- a/llvm/lib/Target/DirectX/CMakeLists.txt
+++ b/llvm/lib/Target/DirectX/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_target(DirectXCodeGen
   DirectXSubtarget.cpp
   DirectXTargetMachine.cpp
   DXContainerGlobals.cpp
+  DXILIntrinsicExpansion.cpp
   DXILMetadata.cpp
   DXILOpBuilder.cpp
   DXILOpLowering.cpp
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 66b0ef2..b0e587d 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -266,6 +266,10 @@ def Frac : DXILOpMapping<22, unary, int_dx_frac,
                          "Returns a fraction from 0 to 1 that represents the "
                          "decimal part of the input.",
                          [llvm_halforfloat_ty, LLVMMatchType<0>]>;
+def RSqrt : DXILOpMapping<25, unary, int_dx_rsqrt,
+                         "Returns the reciprocal of the square root of the specified value."
+                         "rsqrt(x) = 1 / sqrt(x).",
+                         [llvm_halforfloat_ty, LLVMMatchType<0>]>;
 def Round : DXILOpMapping<26, unary, int_round,
                          "Returns the input rounded to the nearest integer"
                          "within a floating-point type.",
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
new file mode 100644
index 0000000..0461f04
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -0,0 +1,186 @@
+//===- DXILIntrinsicExpansion.cpp - Prepare LLVM Module for DXIL encoding--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file contains DXIL intrinsic expansions for those that don't have
+//  opcodes in DirectX Intermediate Language (DXIL).
+//===----------------------------------------------------------------------===//
+
+#include "DXILIntrinsicExpansion.h"
+#include "DirectX.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+
+#define DEBUG_TYPE "dxil-intrinsic-expansion"
+
+using namespace llvm;
+
+static bool isIntrinsicExpansion(Function &F) {
+  switch (F.getIntrinsicID()) {
+  case Intrinsic::exp:
+  case Intrinsic::dx_any:
+  case Intrinsic::dx_lerp:
+  case Intrinsic::dx_rcp:
+    return true;
+  }
+  return false;
+}
+
+static bool expandExpIntrinsic(CallInst *Orig) {
+  Value *X = Orig->getOperand(0);
+  IRBuilder<> Builder(Orig->getParent());
+  Builder.SetInsertPoint(Orig);
+  Type *Ty = X->getType();
+  Type *EltTy = Ty->getScalarType();
+  Constant *Log2eConst =
+      Ty->isVectorTy() ? ConstantVector::getSplat(
+                             ElementCount::getFixed(
+                                 cast<FixedVectorType>(Ty)->getNumElements()),
+                             ConstantFP::get(EltTy, numbers::log2e))
+                       : ConstantFP::get(EltTy, numbers::log2e);
+  Value *NewX = Builder.CreateFMul(Log2eConst, X);
+  auto *Exp2Call =
+      Builder.CreateIntrinsic(Ty, Intrinsic::exp2, {NewX}, nullptr, "dx.exp2");
+  Exp2Call->setTailCall(Orig->isTailCall());
+  Exp2Call->setAttributes(Orig->getAttributes());
+  Orig->replaceAllUsesWith(Exp2Call);
+  Orig->eraseFromParent();
+  return true;
+}
+
+static bool expandAnyIntrinsic(CallInst *Orig) {
+  Value *X = Orig->getOperand(0);
+  IRBuilder<> Builder(Orig->getParent());
+  Builder.SetInsertPoint(Orig);
+  Type *Ty = X->getType();
+  Type *EltTy = Ty->getScalarType();
+
+  if (!Ty->isVectorTy()) {
+    Value *Cond = EltTy->isFloatingPointTy()
+                      ? Builder.CreateFCmpUNE(X, ConstantFP::get(EltTy, 0))
+                      : Builder.CreateICmpNE(X, ConstantInt::get(EltTy, 0));
+    Orig->replaceAllUsesWith(Cond);
+  } else {
+    auto *XVec = dyn_cast<FixedVectorType>(Ty);
+    Value *Cond =
+        EltTy->isFloatingPointTy()
+            ? Builder.CreateFCmpUNE(
+                  X, ConstantVector::getSplat(
+                         ElementCount::getFixed(XVec->getNumElements()),
+                         ConstantFP::get(EltTy, 0)))
+            : Builder.CreateICmpNE(
+                  X, ConstantVector::getSplat(
+                         ElementCount::getFixed(XVec->getNumElements()),
+                         ConstantInt::get(EltTy, 0)));
+    Value *Result = Builder.CreateExtractElement(Cond, (uint64_t)0);
+    for (unsigned I = 1; I < XVec->getNumElements(); I++) {
+      Value *Elt = Builder.CreateExtractElement(Cond, I);
+      Result = Builder.CreateOr(Result, Elt);
+    }
+    Orig->replaceAllUsesWith(Result);
+  }
+  Orig->eraseFromParent();
+  return true;
+}
+
+static bool expandLerpIntrinsic(CallInst *Orig) {
+  Value *X = Orig->getOperand(0);
+  Value *Y = Orig->getOperand(1);
+  Value *S = Orig->getOperand(2);
+  IRBuilder<> Builder(Orig->getParent());
+  Builder.SetInsertPoint(Orig);
+  auto *V = Builder.CreateFSub(Y, X);
+  V = Builder.CreateFMul(S, V);
+  auto *Result = Builder.CreateFAdd(X, V, "dx.lerp");
+  Orig->replaceAllUsesWith(Result);
+  Orig->eraseFromParent();
+  return true;
+}
+
+static bool expandRcpIntrinsic(CallInst *Orig) {
+  Value *X = Orig->getOperand(0);
+  IRBuilder<> Builder(Orig->getParent());
+  Builder.SetInsertPoint(Orig);
+  Type *Ty = X->getType();
+  Type *EltTy = Ty->getScalarType();
+  Constant *One =
+      Ty->isVectorTy()
+          ? ConstantVector::getSplat(
+                ElementCount::getFixed(
+                    dyn_cast<FixedVectorType>(Ty)->getNumElements()),
+                ConstantFP::get(EltTy, 1.0))
+          : ConstantFP::get(EltTy, 1.0);
+  auto *Result = Builder.CreateFDiv(One, X, "dx.rcp");
+  Orig->replaceAllUsesWith(Result);
+  Orig->eraseFromParent();
+  return true;
+}
+
+static bool expandIntrinsic(Function &F, CallInst *Orig) {
+  switch (F.getIntrinsicID()) {
+  case Intrinsic::exp:
+    return expandExpIntrinsic(Orig);
+  case Intrinsic::dx_any:
+    return expandAnyIntrinsic(Orig);
+  case Intrinsic::dx_lerp:
+    return expandLerpIntrinsic(Orig);
+  case Intrinsic::dx_rcp:
+    return expandRcpIntrinsic(Orig);
+  }
+  return false;
+}
+
+static bool expansionIntrinsics(Module &M) {
+  for (auto &F : make_early_inc_range(M.functions())) {
+    if (!isIntrinsicExpansion(F))
+      continue;
+    bool IntrinsicExpanded = false;
+    for (User *U : make_early_inc_range(F.users())) {
+      auto *IntrinsicCall = dyn_cast<CallInst>(U);
+      if (!IntrinsicCall)
+        continue;
+      IntrinsicExpanded = expandIntrinsic(F, IntrinsicCall);
+    }
+    if (F.user_empty() && IntrinsicExpanded)
+      F.eraseFromParent();
+  }
+  return true;
+}
+
+PreservedAnalyses DXILIntrinsicExpansion::run(Module &M,
+                                              ModuleAnalysisManager &) {
+  if (expansionIntrinsics(M))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
+bool DXILIntrinsicExpansionLegacy::runOnModule(Module &M) {
+  return expansionIntrinsics(M);
+}
+
+char DXILIntrinsicExpansionLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(DXILIntrinsicExpansionLegacy, DEBUG_TYPE,
+                      "DXIL Intrinsic Expansion", false, false)
+INITIALIZE_PASS_END(DXILIntrinsicExpansionLegacy, DEBUG_TYPE,
+                    "DXIL Intrinsic Expansion", false, false)
+
+ModulePass *llvm::createDXILIntrinsicExpansionLegacyPass() {
+  return new DXILIntrinsicExpansionLegacy();
+}
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
new file mode 100644
index 0000000..c86681a
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
@@ -0,0 +1,33 @@
+//===- DXILIntrinsicExpansion.h - Prepare LLVM Module for DXIL encoding----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TARGET_DIRECTX_DXILINTRINSICEXPANSION_H
+#define LLVM_TARGET_DIRECTX_DXILINTRINSICEXPANSION_H
+
+#include "DXILResource.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A pass that transforms DXIL Intrinsics that don't have DXIL opCodes
+class DXILIntrinsicExpansion : public PassInfoMixin<DXILIntrinsicExpansion> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
+};
+
+class DXILIntrinsicExpansionLegacy : public ModulePass {
+
+public:
+  bool runOnModule(Module &M) override;
+  DXILIntrinsicExpansionLegacy() : ModulePass(ID) {}
+
+  static char ID; // Pass identification.
+};
+} // namespace llvm
+
+#endif // LLVM_TARGET_DIRECTX_DXILINTRINSICEXPANSION_H
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 6b649b7..e5c2042 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "DXILConstants.h"
+#include "DXILIntrinsicExpansion.h"
 #include "DXILOpBuilder.h"
 #include "DirectX.h"
 #include "llvm/ADT/SmallVector.h"
@@ -94,9 +95,12 @@ public:
   DXILOpLoweringLegacy() : ModulePass(ID) {}
 
   static char ID; // Pass identification.
+  void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
+    // Specify the passes that your pass depends on
+    AU.addRequired<DXILIntrinsicExpansionLegacy>();
+  }
 };
 char DXILOpLoweringLegacy::ID = 0;
-
 } // end anonymous namespace
 
 INITIALIZE_PASS_BEGIN(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering",
diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h
index eaecc3a..11b5412 100644
--- a/llvm/lib/Target/DirectX/DirectX.h
+++ b/llvm/lib/Target/DirectX/DirectX.h
@@ -28,6 +28,12 @@ void initializeDXILPrepareModulePass(PassRegistry &);
 /// Pass to convert modules into DXIL-compatable modules
 ModulePass *createDXILPrepareModulePass();
 
+/// Initializer for DXIL Intrinsic Expansion
+void initializeDXILIntrinsicExpansionLegacyPass(PassRegistry &);
+
+/// Pass to expand intrinsic operations that lack DXIL opCodes
+ModulePass *createDXILIntrinsicExpansionLegacyPass();
+
 /// Initializer for DXILOpLowering
 void initializeDXILOpLoweringLegacyPass(PassRegistry &);
 
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 06938f8..03c825b 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -39,6 +39,7 @@ using namespace llvm;
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
   RegisterTargetMachine<DirectXTargetMachine> X(getTheDirectXTarget());
   auto *PR = PassRegistry::getPassRegistry();
+  initializeDXILIntrinsicExpansionLegacyPass(*PR);
   initializeDXILPrepareModulePass(*PR);
   initializeEmbedDXILPassPass(*PR);
   initializeWriteDXILPassPass(*PR);
@@ -76,6 +77,7 @@ public:
 
   FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; }
   void addCodeGenPrepare() override {
+    addPass(createDXILIntrinsicExpansionLegacyPass());
     addPass(createDXILOpLoweringLegacyPass());
     addPass(createDXILPrepareModulePass());
     addPass(createDXILTranslateMetadataPass());
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index cf4b66f..458b871 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -138,20 +138,6 @@ ElementCount HexagonTTIImpl::getMinimumVF(unsigned ElemWidth,
   return ElementCount::getFixed((8 * ST.getVectorLength()) / ElemWidth);
 }
 
-InstructionCost HexagonTTIImpl::getScalarizationOverhead(
-    VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
-    TTI::TargetCostKind CostKind) {
-  return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
-                                         CostKind);
-}
-
-InstructionCost
-HexagonTTIImpl::getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
-                                                 ArrayRef<Type *> Tys,
-                                                 TTI::TargetCostKind CostKind) {
-  return BaseT::getOperandsScalarizationOverhead(Args, Tys, CostKind);
-}
-
 InstructionCost HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
                                                  ArrayRef<Type *> Tys,
                                                  TTI::TargetCostKind CostKind) {
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index ec0fd45..fdb34f3 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -103,14 +103,6 @@ public:
     return true;
   }
 
-  InstructionCost getScalarizationOverhead(VectorType *Ty,
-                                           const APInt &DemandedElts,
-                                           bool Insert, bool Extract,
-                                           TTI::TargetCostKind CostKind);
-  InstructionCost
-  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
-                                   ArrayRef<Type *> Tys,
-                                   TTI::TargetCostKind CostKind);
   InstructionCost getCallInstrCost(Function *F, Type *RetTy,
                                    ArrayRef<Type *> Tys,
                                    TTI::TargetCostKind CostKind);
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 2219d9f..a6df4b3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1019,7 +1019,6 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
   const DataLayout &DL = getDataLayout();
 
   // GlobalVariables are always constant pointers themselves.
-  PointerType *PTy = GVar->getType();
   Type *ETy = GVar->getValueType();
 
   if (GVar->hasExternalLinkage()) {
@@ -1027,6 +1026,9 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
       O << ".visible ";
     else
       O << ".extern ";
+  } else if (GVar->hasCommonLinkage() &&
+             GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) {
+    O << ".common ";
   } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
              GVar->hasAvailableExternallyLinkage() ||
              GVar->hasCommonLinkage()) {
@@ -1138,7 +1140,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
   }
 
   O << ".";
-  emitPTXAddressSpace(PTy->getAddressSpace(), O);
+  emitPTXAddressSpace(GVar->getAddressSpace(), O);
 
   if (isManaged(*GVar)) {
     if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
@@ -1167,8 +1169,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
     // Ptx allows variable initilization only for constant and global state
     // spaces.
     if (GVar->hasInitializer()) {
-      if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
-          (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
+      if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
+          (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) {
         const Constant *Initializer = GVar->getInitializer();
         // 'undef' is treated as there is no value specified.
         if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
@@ -1183,7 +1185,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
             !isa<UndefValue>(GVar->getInitializer())) {
           report_fatal_error("initial value of '" + GVar->getName() +
                              "' is not allowed in addrspace(" +
-                             Twine(PTy->getAddressSpace()) + ")");
+                             Twine(GVar->getAddressSpace()) + ")");
         }
       }
     }
@@ -1202,8 +1204,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
       ElementSize = DL.getTypeStoreSize(ETy);
       // Ptx allows variable initilization only for constant and
       // global state spaces.
-      if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
-           (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
+      if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
+           (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
           GVar->hasInitializer()) {
         const Constant *Initializer = GVar->getInitializer();
         if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 86fb367..c34472c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -60,10 +60,12 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
                      NRI->getFrameRegister(MF))
                  .addReg(NRI->getFrameLocalRegister(MF));
     }
-    BuildMI(MBB, MBBI, dl,
-            MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
-            NRI->getFrameLocalRegister(MF))
-        .addImm(MF.getFunctionNumber());
+    if (!MR.use_empty(NRI->getFrameLocalRegister(MF))) {
+      BuildMI(MBB, MBBI, dl,
+              MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
+              NRI->getFrameLocalRegister(MF))
+          .addImm(MF.getFunctionNumber());
+    }
   }
 }
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index c979c03..7ad3d99 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -645,8 +645,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
   setOperationAction(ISD::ConstantFP, MVT::bf16, Legal);
 
-  // Lowering of DYNAMIC_STACKALLOC is unsupported.
-  // Custom lower to produce an error.
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
 
@@ -937,6 +935,7 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(NVPTXISD::BFE)
     MAKE_CASE(NVPTXISD::BFI)
     MAKE_CASE(NVPTXISD::PRMT)
+    MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC)
     MAKE_CASE(NVPTXISD::SETP_F16X2)
     MAKE_CASE(NVPTXISD::SETP_BF16X2)
     MAKE_CASE(NVPTXISD::Dummy)
@@ -2211,14 +2210,39 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
 SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                                      SelectionDAG &DAG) const {
-  const Function &Fn = DAG.getMachineFunction().getFunction();
-
-  DiagnosticInfoUnsupported NoDynamicAlloca(
-      Fn, "dynamic alloca unsupported by NVPTX backend",
-      SDLoc(Op).getDebugLoc());
-  DAG.getContext()->diagnose(NoDynamicAlloca);
-  auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};
-  return DAG.getMergeValues(Ops, SDLoc());
+
+  if (STI.getPTXVersion() < 73 || STI.getSmVersion() < 52) {
+    const Function &Fn = DAG.getMachineFunction().getFunction();
+
+    DiagnosticInfoUnsupported NoDynamicAlloca(
+        Fn,
+        "Support for dynamic alloca introduced in PTX ISA version 7.3 and "
+        "requires target sm_52.",
+        SDLoc(Op).getDebugLoc());
+    DAG.getContext()->diagnose(NoDynamicAlloca);
+    auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()),
+                Op.getOperand(0)};
+    return DAG.getMergeValues(Ops, SDLoc());
+  }
+
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size = Op.getOperand(1);
+  uint64_t Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+  SDLoc DL(Op.getNode());
+
+  // The size for ptx alloca instruction is 64-bit for m64 and 32-bit for m32.
+  if (nvTM->is64Bit())
+    Size = DAG.getZExtOrTrunc(Size, DL, MVT::i64);
+  else
+    Size = DAG.getZExtOrTrunc(Size, DL, MVT::i32);
+
+  SDValue AllocOps[] = {Chain, Size,
+                        DAG.getTargetConstant(Align, DL, MVT::i32)};
+  SDValue Alloca = DAG.getNode(NVPTXISD::DYNAMIC_STACKALLOC, DL,
+                               nvTM->is64Bit() ? MVT::i64 : MVT::i32, AllocOps);
+
+  SDValue MergeOps[] = {Alloca, Chain};
+  return DAG.getMergeValues(MergeOps, DL);
 }
 
 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
@@ -6100,6 +6124,9 @@ NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
 
   if (AI->isFloatingPointOperation()) {
     if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
+      if (Ty->isHalfTy() && STI.getSmVersion() >= 70 &&
+          STI.getPTXVersion() >= 63)
+        return AtomicExpansionKind::None;
       if (Ty->isFloatTy())
         return AtomicExpansionKind::None;
       if (Ty->isDoubleTy() && STI.hasAtomAddF64())
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index cf1d458..c9db10e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -61,6 +61,7 @@ enum NodeType : unsigned {
   BFE,
   BFI,
   PRMT,
+  DYNAMIC_STACKALLOC,
   Dummy,
 
   LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 3dc5b45..3d387ed 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -3805,6 +3805,28 @@ def CALL_PROTOTYPE :
   NVPTXInst<(outs), (ins ProtoIdent:$ident),
             "$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
 
+def SDTDynAllocaOp :
+  SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<1>, SDTCisInt<2>]>;
+
+def dyn_alloca :
+  SDNode<"NVPTXISD::DYNAMIC_STACKALLOC", SDTDynAllocaOp,
+         [SDNPHasChain, SDNPSideEffect]>;
+
+def DYNAMIC_STACKALLOC32 :
+  NVPTXInst<(outs Int32Regs:$ptr),
+            (ins Int32Regs:$size, i32imm:$align),
+            "alloca.u32 \t$ptr, $size, $align;\n\t"
+            "cvta.local.u32 \t$ptr, $ptr;",
+            [(set (i32 Int32Regs:$ptr), (dyn_alloca Int32Regs:$size, (i32 timm:$align)))]>,
+            Requires<[hasPTX<73>, hasSM<52>]>;
+
+def DYNAMIC_STACKALLOC64 :
+  NVPTXInst<(outs Int64Regs:$ptr),
+            (ins Int64Regs:$size, i32imm:$align),
+            "alloca.u64 \t$ptr, $size, $align;\n\t"
+            "cvta.local.u64 \t$ptr, $ptr;",
+            [(set Int64Regs:$ptr, (dyn_alloca Int64Regs:$size, (i32 timm:$align)))]>,
+            Requires<[hasPTX<73>, hasSM<52>]>;
 
 include "NVPTXIntrinsics.td"
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 477789a..c0c5338 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1520,7 +1520,7 @@ multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
   def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
-  Requires<Pred>;
+  Requires<!if(!eq(TypeStr, ".f16"), [Predicate<"false">], Pred)>;
 }
 multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
   string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
@@ -1630,6 +1630,13 @@ defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
   ".add", atomic_load_add_64_gen, i64imm, imm>;
 
+defm INT_PTX_ATOM_ADD_G_F16 : F_ATOMIC_2<f16, Int16Regs, ".global", ".f16", ".add.noftz",
+  atomic_load_add_g, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
+defm INT_PTX_ATOM_ADD_S_F16 : F_ATOMIC_2<f16, Int16Regs, ".shared", ".f16", ".add.noftz",
+  atomic_load_add_s, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
+defm INT_PTX_ATOM_ADD_GEN_F16 : F_ATOMIC_2<f16, Int16Regs, "", ".f16", ".add.noftz",
+  atomic_load_add_gen, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
+
 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
   atomic_load_add_g, f32imm, fpimm>;
 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
@@ -2008,6 +2015,9 @@ multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
                        list<Predicate> Preds> {
   let AddedComplexity = 1 in {
     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
+                      (ins Int16Regs:$src, regclass:$b),
+                      (Intr (i16 Int16Regs:$src), (regT regclass:$b))>;
+    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
                       (ins Int32Regs:$src, regclass:$b),
                       (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
@@ -2018,6 +2028,9 @@ multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
   // from Instruction) so we have to enforce specific type on
   // immediates via explicit cast to ImmTy.
   def : ATOM23_impl<AsmStr, regT, regclass, Preds,
+                    (ins Int16Regs:$src, ImmType:$b),
+                    (Intr (i16 Int16Regs:$src), (ImmTy Imm:$b))>;
+  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
                     (ins Int32Regs:$src, ImmType:$b),
                     (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
   def : ATOM23_impl<AsmStr, regT, regclass, Preds,
@@ -2136,6 +2149,8 @@ multiclass ATOM2_add_impl<string OpStr> {
    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
+   defm _f16  : ATOM2S_impl<OpStr, "f", "f16", f16, Int16Regs, f16imm, fpimm, f16,
+                            [hasSM<70>, hasPTX<63>]>;
    defm _f32  : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
                             []>;
    defm _f64  : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 542854e..64cae1c 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -474,6 +474,35 @@ static void collectTOCStats(PPCAsmPrinter::TOCEntryType Type) {
   }
 }
 
+static CodeModel::Model getCodeModel(const PPCSubtarget &S,
+                                     const TargetMachine &TM,
+                                     const MachineOperand &MO) {
+  CodeModel::Model ModuleModel = TM.getCodeModel();
+
+  // If the operand is not a global address then there is no
+  // global variable to carry an attribute.
+  if (!(MO.getType() == MachineOperand::MO_GlobalAddress))
+    return ModuleModel;
+
+  const GlobalValue *GV = MO.getGlobal();
+  assert(GV && "expected global for MO_GlobalAddress");
+
+  return S.getCodeModel(TM, GV);
+}
+
+static void setOptionalCodeModel(MCSymbolXCOFF *XSym, CodeModel::Model CM) {
+  switch (CM) {
+  case CodeModel::Large:
+    XSym->setPerSymbolCodeModel(MCSymbolXCOFF::CM_Large);
+    return;
+  case CodeModel::Small:
+    XSym->setPerSymbolCodeModel(MCSymbolXCOFF::CM_Small);
+    return;
+  default:
+    report_fatal_error("Invalid code model for AIX");
+  }
+}
+
 /// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
 /// exists for it.  If not, create one.  Then return a symbol that references
 /// the TOC entry.
@@ -1014,7 +1043,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     // relative to the toc-base.
     if (IsAIX) {
       assert(
-          TM.getCodeModel() == CodeModel::Small &&
+          getCodeModel(*Subtarget, TM, MO) == CodeModel::Small &&
           "This pseudo should only be selected for 32-bit small code model.");
       Exp = getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp, VK);
       TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
@@ -1098,7 +1127,12 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     return;
   }
   case PPC::ADDIStocHA: {
-    assert((IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large) &&
+    const MachineOperand &MO = MI->getOperand(2);
+
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+           "Invalid operand for ADDIStocHA.");
+    assert((IsAIX && !IsPPC64 &&
+            getCodeModel(*Subtarget, TM, MO) == CodeModel::Large) &&
            "This pseudo should only be selected for 32-bit large code model on"
            " AIX.");
 
@@ -1108,10 +1142,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     // Change the opcode to ADDIS.
     TmpInst.setOpcode(PPC::ADDIS);
 
-    const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
-           "Invalid operand for ADDIStocHA.");
-
     // Map the machine operand to its corresponding MCSymbol.
     MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
 
@@ -1131,7 +1161,12 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     return;
   }
   case PPC::LWZtocL: {
-    assert(IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large &&
+    const MachineOperand &MO = MI->getOperand(1);
+
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+           "Invalid operand for LWZtocL.");
+    assert(IsAIX && !IsPPC64 &&
+           getCodeModel(*Subtarget, TM, MO) == CodeModel::Large &&
            "This pseudo should only be selected for 32-bit large code model on"
            " AIX.");
 
@@ -1141,10 +1176,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     // Change the opcode to lwz.
     TmpInst.setOpcode(PPC::LWZ);
 
-    const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
-           "Invalid operand for LWZtocL.");
-
     // Map the machine operand to its corresponding MCSymbol.
     MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
 
@@ -1183,8 +1214,12 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
 
     const bool GlobalToc =
         MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
+
+    const CodeModel::Model CM =
+        IsAIX ? getCodeModel(*Subtarget, TM, MO) : TM.getCodeModel();
+
     if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
-        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large))
+        (MO.isCPI() && CM == CodeModel::Large))
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
 
     VK = IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA;
@@ -1225,8 +1260,9 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
 
     MCSymbolRefExpr::VariantKind VK = GetVKForMO(MO);
-
-    if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large)
+    CodeModel::Model CM =
+        IsAIX ? getCodeModel(*Subtarget, TM, MO) : TM.getCodeModel();
+    if (!MO.isCPI() || CM == CodeModel::Large)
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
 
     VK = IsAIX ? MCSymbolRefExpr::VK_PPC_L : MCSymbolRefExpr::VK_PPC_TOC_LO;
@@ -2651,6 +2687,27 @@ uint64_t PPCAIXAsmPrinter::getAliasOffset(const Constant *C) {
   return 0;
 }
 
+static void tocDataChecks(unsigned PointerSize, const GlobalVariable *GV) {
+  // TODO: These asserts should be updated as more support for the toc data
+  // transformation is added (struct support, etc.).
+  assert(
+      PointerSize >= GV->getAlign().valueOrOne().value() &&
+      "GlobalVariables with an alignment requirement stricter than TOC entry "
+      "size not supported by the toc data transformation.");
+
+  Type *GVType = GV->getValueType();
+  assert(GVType->isSized() && "A GlobalVariable's size must be known to be "
+                              "supported by the toc data transformation.");
+  if (GV->getParent()->getDataLayout().getTypeSizeInBits(GVType) >
+      PointerSize * 8)
+    report_fatal_error(
+        "A GlobalVariable with size larger than a TOC entry is not currently "
+        "supported by the toc data transformation.");
+  if (GV->hasPrivateLinkage())
+    report_fatal_error("A GlobalVariable with private linkage is not "
+                       "currently supported by the toc data transformation.");
+}
+
 void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
   // Special LLVM global arrays have been handled at the initialization.
   if (isSpecialLLVMGlobalArrayToSkip(GV) || isSpecialLLVMGlobalArrayForStaticInit(GV))
@@ -2660,7 +2717,7 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
   // when we emit the .toc section.
   if (GV->hasAttribute("toc-data")) {
     unsigned PointerSize = GV->getParent()->getDataLayout().getPointerSize();
-    Subtarget->tocDataChecks(PointerSize, GV);
+    tocDataChecks(PointerSize, GV);
     TOCDataGlobalVars.push_back(GV);
     return;
   }
@@ -2963,6 +3020,10 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
     }
 
     setCsectAlignment(&G);
+    std::optional<CodeModel::Model> OptionalCodeModel = G.getCodeModel();
+    if (OptionalCodeModel)
+      setOptionalCodeModel(cast<MCSymbolXCOFF>(getSymbol(&G)),
+                           *OptionalCodeModel);
   }
 
   for (const auto &F : M)
@@ -2984,6 +3045,15 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
                          false);
     }
 
+    const GlobalVariable *GVar =
+        dyn_cast_or_null<GlobalVariable>(Alias.getAliaseeObject());
+    if (GVar) {
+      std::optional<CodeModel::Model> OptionalCodeModel = GVar->getCodeModel();
+      if (OptionalCodeModel)
+        setOptionalCodeModel(cast<MCSymbolXCOFF>(getSymbol(&Alias)),
+                             *OptionalCodeModel);
+    }
+
     GOAliasMap[Aliasee].push_back(&Alias);
   }
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 0c25accd..dfea9e7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -524,6 +524,24 @@ static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
   return true;
 }
 
+static CodeModel::Model getCodeModel(const PPCSubtarget &Subtarget,
+                                     const TargetMachine &TM,
+                                     const SDNode *Node) {
+  // If there isn't an attribute to override the module code model
+  // this will be the effective code model.
+  CodeModel::Model ModuleModel = TM.getCodeModel();
+
+  GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Node->getOperand(0));
+  if (!GA)
+    return ModuleModel;
+
+  const GlobalValue *GV = GA->getGlobal();
+  if (!GV)
+    return ModuleModel;
+
+  return Subtarget.getCodeModel(TM, GV);
+}
+
 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 /// operand. If so Imm will receive the 32-bit value.
 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
@@ -6059,7 +6077,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     const bool isAIXABI = Subtarget->isAIXABI();
 
     // PowerPC only support small, medium and large code model.
-    const CodeModel::Model CModel = TM.getCodeModel();
+    const CodeModel::Model CModel = getCodeModel(*Subtarget, TM, N);
+
     assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
            "PowerPC doesn't support tiny or kernel code models.");
 
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 884f2f5..653d9bd 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -185,37 +185,64 @@ bool PPCSubtarget::enableSubRegLiveness() const {
   return UseSubRegLiveness;
 }
 
-void PPCSubtarget::tocDataChecks(unsigned PointerSize,
-                                 const GlobalVariable *GV) const {
-  // TODO: These asserts should be updated as more support for the toc data
-  // transformation is added (struct support, etc.).
-  assert(
-      PointerSize >= GV->getAlign().valueOrOne().value() &&
-      "GlobalVariables with an alignment requirement stricter than TOC entry "
-      "size not supported by the toc data transformation.");
-
-  Type *GVType = GV->getValueType();
-  assert(GVType->isSized() && "A GlobalVariable's size must be known to be "
-                              "supported by the toc data transformation.");
-  if (GV->getParent()->getDataLayout().getTypeSizeInBits(GVType) >
-      PointerSize * 8)
-    report_fatal_error(
-        "A GlobalVariable with size larger than a TOC entry is not currently "
-        "supported by the toc data transformation.");
-  if (GV->hasPrivateLinkage())
-    report_fatal_error("A GlobalVariable with private linkage is not "
-                       "currently supported by the toc data transformation.");
-}
-
 bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
+  if (isAIXABI()) {
+    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+      // On AIX the only symbols that aren't indirect are toc-data.
+      return !GVar->hasAttribute("toc-data");
+
+    return true;
+  }
+
   // Large code model always uses the TOC even for local symbols.
   if (TM.getCodeModel() == CodeModel::Large)
     return true;
+
   if (TM.shouldAssumeDSOLocal(GV))
     return false;
   return true;
 }
 
+CodeModel::Model PPCSubtarget::getCodeModel(const TargetMachine &TM,
+                                            const GlobalValue *GV) const {
+  // If there isn't an attribute to override the module code model
+  // this will be the effective code model.
+  CodeModel::Model ModuleModel = TM.getCodeModel();
+
+  // Initially support per global code model for AIX only.
+  if (!isAIXABI())
+    return ModuleModel;
+
+  // Only GlobalVariables carry an attribute which can override the module code
+  // model.
+  assert(GV && "Unexpected NULL GlobalValue");
+  const GlobalVariable *GlobalVar =
+      [](const GlobalValue *GV) -> const GlobalVariable * {
+    const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV);
+    if (Var)
+      return Var;
+
+    const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
+    if (Alias)
+      return dyn_cast<GlobalVariable>(Alias->getAliaseeObject());
+
+    return nullptr;
+  }(GV);
+
+  if (!GlobalVar)
+    return ModuleModel;
+
+  std::optional<CodeModel::Model> MaybeCodeModel = GlobalVar->getCodeModel();
+  if (MaybeCodeModel) {
+    CodeModel::Model CM = *MaybeCodeModel;
+    assert((CM == CodeModel::Small || CM == CodeModel::Large) &&
+           "invalid code model for AIX");
+    return CM;
+  }
+
+  return ModuleModel;
+}
+
 bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
 bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); }
 
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index d913f22..bf35f8e 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -245,7 +245,9 @@ public:
   /// True if the GV will be accessed via an indirect symbol.
   bool isGVIndirectSymbol(const GlobalValue *GV) const;
 
-  void tocDataChecks(unsigned PointerSize, const GlobalVariable *GV) const;
+  /// Calculates the effective code model for argument GV.
+  CodeModel::Model getCodeModel(const TargetMachine &TM,
+                                const GlobalValue *GV) const;
 
   /// True if the ABI is descriptor based.
   bool usesFunctionDescriptors() const {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 803774f..105587f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -717,7 +717,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
     static const unsigned FloatingPointVecReduceOps[] = {
         ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
-        ISD::VECREDUCE_FMAX};
+        ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
 
     if (!Subtarget.is64Bit()) {
       // We must custom-lower certain vXi64 operations on RV32 due to the vector
@@ -6541,6 +6541,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::VECREDUCE_SEQ_FADD:
   case ISD::VECREDUCE_FMIN:
   case ISD::VECREDUCE_FMAX:
+  case ISD::VECREDUCE_FMAXIMUM:
+  case ISD::VECREDUCE_FMINIMUM:
     return lowerFPVECREDUCE(Op, DAG);
   case ISD::VP_REDUCE_ADD:
   case ISD::VP_REDUCE_UMAX:
@@ -9541,14 +9543,17 @@ getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT,
   case ISD::VECREDUCE_SEQ_FADD:
     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
                            Op.getOperand(0));
+  case ISD::VECREDUCE_FMINIMUM:
+  case ISD::VECREDUCE_FMAXIMUM:
   case ISD::VECREDUCE_FMIN:
   case ISD::VECREDUCE_FMAX: {
     SDValue Front =
         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
                     DAG.getVectorIdxConstant(0, DL));
-    unsigned RVVOpc = (Opcode == ISD::VECREDUCE_FMIN)
-                          ? RISCVISD::VECREDUCE_FMIN_VL
-                          : RISCVISD::VECREDUCE_FMAX_VL;
+    unsigned RVVOpc =
+        (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
+            ? RISCVISD::VECREDUCE_FMIN_VL
+            : RISCVISD::VECREDUCE_FMAX_VL;
     return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
   }
   }
@@ -9571,9 +9576,30 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
   }
 
+  MVT ResVT = Op.getSimpleValueType();
   auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
-  return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), ScalarVal,
-                           VectorVal, Mask, VL, DL, DAG, Subtarget);
+  SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
+                                  VL, DL, DAG, Subtarget);
+  if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
+      Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
+    return Res;
+
+  if (Op->getFlags().hasNoNaNs())
+    return Res;
+
+  // Force output to NaN if any element is Nan.
+  SDValue IsNan =
+      DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
+                  {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
+                   DAG.getUNDEF(Mask.getValueType()), Mask, VL});
+  MVT XLenVT = Subtarget.getXLenVT();
+  SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
+  SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
+                                DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
+  return DAG.getSelect(
+      DL, ResVT, NoNaNs, Res,
+      DAG.getConstantFP(APFloat::getNaN(DAG.EVTToAPFloatSemantics(ResVT)), DL,
+                        ResVT));
 }
 
 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index e753c1f..966cdc4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1405,7 +1405,8 @@ multiclass BccPat<CondCode Cond, RVInstB Inst> {
 
 class BrccCompressOpt<CondCode Cond, RVInstB Inst>
     : Pat<(riscv_brcc GPR:$lhs, simm12_no6:$Constant, Cond, bb:$place),
-          (Inst (ADDI GPR:$lhs, (NegImm simm12:$Constant)), (XLenVT X0), bb:$place)>;
+          (Inst (XLenVT (ADDI GPR:$lhs, (NegImm simm12:$Constant))),
+                (XLenVT X0), bb:$place)>;
 
 defm : BccPat<SETEQ, BEQ>;
 defm : BccPat<SETNE, BNE>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index f0f8494..a882b20 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -424,13 +424,13 @@ def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh", Commutable=1>,
 
 let Predicates = [HasStdExtZbb] in {
 def MIN  : ALU_rr<0b0000101, 0b100, "min", Commutable=1>,
-           Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+           Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>;
 def MINU : ALU_rr<0b0000101, 0b101, "minu", Commutable=1>,
-           Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+           Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>;
 def MAX  : ALU_rr<0b0000101, 0b110, "max", Commutable=1>,
-           Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+           Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>;
 def MAXU : ALU_rr<0b0000101, 0b111, "maxu", Commutable=1>,
-           Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+           Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>;
 } // Predicates = [HasStdExtZbb]
 
 let Predicates = [HasStdExtZbkb] in {
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 240d170..3586d23 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -186,7 +186,7 @@ class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
                                  WriteBEXT, WriteBEXTI,
                                  WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32,
                                  WriteCPOP, WriteCPOP32,
-                                 WriteREV8, WriteORCB, WriteSFB,
+                                 WriteREV8, WriteORCB, WriteIMinMax, WriteSFB,
                                  WriteIMul, WriteIMul32,
                                  WriteIDiv, WriteIDiv32,
                                  WriteIRem, WriteIRem32,
@@ -305,6 +305,9 @@ def : WriteRes<WriteCPOP32, [SiFive7PipeB]>;
 // orc.b is in the late-B ALU.
 def : WriteRes<WriteORCB, [SiFive7PipeB]>;
 
+// min/max are in the late-B ALU
+def : WriteRes<WriteIMinMax, [SiFive7PipeB]>;
+
 // rev8 is in the late-A and late-B ALUs.
 def : WriteRes<WriteREV8, [SiFive7PipeAB]>;
 
@@ -1041,6 +1044,7 @@ def : SiFive7AnyToGPRBypass<ReadCTZ32>;
 def : ReadAdvance<ReadCPOP, 0>;
 def : ReadAdvance<ReadCPOP32, 0>;
 def : SiFive7AnyToGPRBypass<ReadORCB>;
+def : SiFive7AnyToGPRBypass<ReadIMinMax>;
 def : SiFive7AnyToGPRBypass<ReadREV8>;
 def : SiFive7AnyToGPRBypass<ReadSHXADD>;
 def : SiFive7AnyToGPRBypass<ReadSHXADD32>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
index d02d34a..8ec2e4f 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
@@ -109,6 +109,7 @@ def : WriteRes<WriteCTZ, [SiFiveP400IntArith]>;
 def : WriteRes<WriteCTZ32, [SiFiveP400IntArith]>;
 
 def : WriteRes<WriteORCB, [SiFiveP400IntArith]>;
+def : WriteRes<WriteIMinMax, [SiFiveP400IntArith]>;
 
 def : WriteRes<WriteREV8, [SiFiveP400IntArith]>;
 
@@ -349,6 +350,7 @@ def : ReadAdvance<ReadCTZ32, 0>;
 def : ReadAdvance<ReadCPOP, 0>;
 def : ReadAdvance<ReadCPOP32, 0>;
 def : ReadAdvance<ReadORCB, 0>;
+def : ReadAdvance<ReadIMinMax, 0>;
 def : ReadAdvance<ReadREV8, 0>;
 def : ReadAdvance<ReadSHXADD, 0>;
 def : ReadAdvance<ReadSHXADD32, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index ef491ed..4fc7b03 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -85,6 +85,7 @@ def : WriteRes<WriteRotateImm32, [XS2ALU]>;
 def : WriteRes<WriteRotateReg, [XS2ALU]>;
 def : WriteRes<WriteRotateReg32, [XS2ALU]>;
 def : WriteRes<WriteORCB, [XS2ALU]>;
+def : WriteRes<WriteIMinMax, [XS2ALU]>;
 def : WriteRes<WriteREV8, [XS2ALU]>;
 
 // Zbkb
@@ -288,6 +289,7 @@ def : ReadAdvance<ReadCTZ32, 0>;
 def : ReadAdvance<ReadCPOP, 0>;
 def : ReadAdvance<ReadCPOP32, 0>;
 def : XS2LoadToALUBypass<ReadORCB>;
+def : XS2LoadToALUBypass<ReadIMinMax>;
 def : XS2LoadToALUBypass<ReadREV8>;
 // Zbkc
 def : ReadAdvance<ReadCLMUL, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleZb.td b/llvm/lib/Target/RISCV/RISCVScheduleZb.td
index 0a16390..93381f4 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleZb.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleZb.td
@@ -25,6 +25,7 @@ def WriteCPOP        : SchedWrite;
 def WriteCPOP32      : SchedWrite;
 def WriteREV8        : SchedWrite;
 def WriteORCB        : SchedWrite;
+def WriteIMinMax     : SchedWrite;
 
 // Zbc extension
 def WriteCLMUL       : SchedWrite; // CLMUL/CLMULR/CLMULH
@@ -63,6 +64,7 @@ def ReadCPOP        : SchedRead;
 def ReadCPOP32      : SchedRead;
 def ReadREV8        : SchedRead;
 def ReadORCB        : SchedRead;
+def ReadIMinMax     : SchedRead;
 
 // Zbc extension
 def ReadCLMUL       : SchedRead; // CLMUL/CLMULR/CLMULH
@@ -106,6 +108,7 @@ def : WriteRes<WriteCPOP, []>;
 def : WriteRes<WriteCPOP32, []>;
 def : WriteRes<WriteREV8, []>;
 def : WriteRes<WriteORCB, []>;
+def : WriteRes<WriteIMinMax, []>;
 
 def : ReadAdvance<ReadRotateImm, 0>;
 def : ReadAdvance<ReadRotateImm32, 0>;
@@ -119,6 +122,7 @@ def : ReadAdvance<ReadCPOP, 0>;
 def : ReadAdvance<ReadCPOP32, 0>;
 def : ReadAdvance<ReadREV8, 0>;
 def : ReadAdvance<ReadORCB, 0>;
+def : ReadAdvance<ReadIMinMax, 0>;
 }
 }
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index f1fbe2b..6f23f05 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -209,7 +209,7 @@ static SPIRVType *getArgSPIRVType(const Function &F, unsigned ArgIdx,
   // spv_assign_ptr_type intrinsic or otherwise use default pointer element
   // type.
   Argument *Arg = F.getArg(ArgIdx);
-  if (Arg->hasByValAttr() || Arg->hasByRefAttr()) {
+  if (HasPointeeTypeAttr(Arg)) {
     Type *ByValRefType = Arg->hasByValAttr() ? Arg->getParamByValType()
                                              : Arg->getParamByRefType();
     SPIRVType *ElementType = GR->getOrCreateSPIRVType(ByValRefType, MIRBuilder);
@@ -319,6 +319,12 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
         buildOpDecorate(VRegs[i][0], MIRBuilder,
                         SPIRV::Decoration::FuncParamAttr, {Attr});
       }
+      if (Arg.hasAttribute(Attribute::ByVal)) {
+        auto Attr =
+            static_cast<unsigned>(SPIRV::FunctionParameterAttribute::ByVal);
+        buildOpDecorate(VRegs[i][0], MIRBuilder,
+                        SPIRV::Decoration::FuncParamAttr, {Attr});
+      }
 
       if (F.getCallingConv() == CallingConv::SPIR_KERNEL) {
         std::vector<SPIRV::Decoration::Decoration> ArgTypeQualDecs =
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index c5b9012..458af92 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -91,6 +91,7 @@ class SPIRVEmitIntrinsics
                                         IRBuilder<> &B);
   void insertPtrCastOrAssignTypeInstr(Instruction *I, IRBuilder<> &B);
   void processGlobalValue(GlobalVariable &GV, IRBuilder<> &B);
+  void processParamTypes(Function *F, IRBuilder<> &B);
 
 public:
   static char ID;
@@ -794,6 +795,64 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I,
   }
 }
 
+void SPIRVEmitIntrinsics::processParamTypes(Function *F, IRBuilder<> &B) {
+  DenseMap<unsigned, Argument *> Args;
+  unsigned i = 0;
+  for (Argument &Arg : F->args()) {
+    if (isUntypedPointerTy(Arg.getType()) &&
+        DeducedElTys.find(&Arg) == DeducedElTys.end() &&
+        !HasPointeeTypeAttr(&Arg))
+      Args[i] = &Arg;
+    i++;
+  }
+  if (Args.size() == 0)
+    return;
+
+  // Args contains opaque pointers without element type definition
+  B.SetInsertPointPastAllocas(F);
+  std::unordered_set<Value *> Visited;
+  for (User *U : F->users()) {
+    CallInst *CI = dyn_cast<CallInst>(U);
+    if (!CI)
+      continue;
+    for (unsigned OpIdx = 0; OpIdx < CI->arg_size() && Args.size() > 0;
+         OpIdx++) {
+      auto It = Args.find(OpIdx);
+      Argument *Arg = It == Args.end() ? nullptr : It->second;
+      if (!Arg)
+        continue;
+      Value *OpArg = CI->getArgOperand(OpIdx);
+      if (!isPointerTy(OpArg->getType()))
+        continue;
+      // maybe we already know the operand's element type
+      auto DeducedIt = DeducedElTys.find(OpArg);
+      Type *ElemTy =
+          DeducedIt == DeducedElTys.end() ? nullptr : DeducedIt->second;
+      if (!ElemTy) {
+        for (User *OpU : OpArg->users()) {
+          if (Instruction *Inst = dyn_cast<Instruction>(OpU)) {
+            Visited.clear();
+            ElemTy = deduceElementTypeHelper(Inst, Visited, DeducedElTys);
+            if (ElemTy)
+              break;
+          }
+        }
+      }
+      if (ElemTy) {
+        unsigned AddressSpace = getPointerAddressSpace(Arg->getType());
+        CallInst *AssignPtrTyCI = buildIntrWithMD(
+            Intrinsic::spv_assign_ptr_type, {Arg->getType()},
+            Constant::getNullValue(ElemTy), Arg, {B.getInt32(AddressSpace)}, B);
+        DeducedElTys[AssignPtrTyCI] = ElemTy;
+        DeducedElTys[Arg] = ElemTy;
+        Args.erase(It);
+      }
+    }
+    if (Args.size() == 0)
+      break;
+  }
+}
+
 bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
   if (Func.isDeclaration())
     return false;
@@ -839,6 +898,11 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
       continue;
     processInstrAfterVisit(I, B);
   }
+
+  // check if function parameter types are set
+  if (!F->isIntrinsic())
+    processParamTypes(F, B);
+
   return true;
 }
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
index 6174807..e6e9131d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -104,11 +104,7 @@ static void validatePtrTypes(const SPIRVSubtarget &STI,
   SPIRV::StorageClass::StorageClass SC =
       static_cast<SPIRV::StorageClass::StorageClass>(
           OpType->getOperand(1).getImm());
-  MachineInstr *PrevI = I.getPrevNode();
-  MachineBasicBlock &MBB = *I.getParent();
-  MachineBasicBlock::iterator InsPt =
-      PrevI ? PrevI->getIterator() : MBB.begin();
-  MachineIRBuilder MIB(MBB, InsPt);
+  MachineIRBuilder MIB(I);
   SPIRVType *NewPtrType = GR.getOrCreateSPIRVPointerType(ResType, MIB, SC);
   if (!GR.isBitcastCompatible(NewPtrType, OpType))
     report_fatal_error(
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index d5ed501..eb87349 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -126,5 +126,10 @@ inline unsigned getPointerAddressSpace(const Type *T) {
              : cast<TypedPointerType>(SubT)->getAddressSpace();
 }
 
+// Return true if the Argument is decorated with a pointee type
+inline bool HasPointeeTypeAttr(Argument *Arg) {
+  return Arg->hasByValAttr() || Arg->hasByRefAttr();
+}
+
 } // namespace llvm
 #endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index fb949d4..03897b5 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -264,7 +264,7 @@ MCSymbol *WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) {
     Params.push_back(AddrType);
   } else { // Function symbols
     WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
-    getLibcallSignature(Subtarget, Name, Returns, Params);
+    WebAssembly::getLibcallSignature(Subtarget, Name, Returns, Params);
   }
   auto Signature = std::make_unique<wasm::WasmSignature>(std::move(Returns),
                                                          std::move(Params));
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index 3e2e029..1896ac6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -524,10 +524,10 @@ struct StaticLibcallNameMap {
 
 } // end anonymous namespace
 
-void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
-                               RTLIB::Libcall LC,
-                               SmallVectorImpl<wasm::ValType> &Rets,
-                               SmallVectorImpl<wasm::ValType> &Params) {
+void WebAssembly::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
+                                      RTLIB::Libcall LC,
+                                      SmallVectorImpl<wasm::ValType> &Rets,
+                                      SmallVectorImpl<wasm::ValType> &Params) {
   assert(Rets.empty());
   assert(Params.empty());
 
@@ -889,10 +889,10 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
 
 // TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unused
 // other than here, just roll its logic into this version.
-void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
-                               StringRef Name,
-                               SmallVectorImpl<wasm::ValType> &Rets,
-                               SmallVectorImpl<wasm::ValType> &Params) {
+void WebAssembly::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
+                                      StringRef Name,
+                                      SmallVectorImpl<wasm::ValType> &Rets,
+                                      SmallVectorImpl<wasm::ValType> &Params) {
   static StaticLibcallNameMap LibcallNameMap;
   auto &Map = LibcallNameMap.Map;
   auto Val = Map.find(Name);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
index f7a94aa..ff6515d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
@@ -22,16 +22,18 @@ namespace llvm {
 
 class WebAssemblySubtarget;
 
-extern void getLibcallSignature(const WebAssemblySubtarget &Subtarget,
-                                RTLIB::Libcall LC,
-                                SmallVectorImpl<wasm::ValType> &Rets,
-                                SmallVectorImpl<wasm::ValType> &Params);
+namespace WebAssembly {
 
-extern void getLibcallSignature(const WebAssemblySubtarget &Subtarget,
-                                StringRef Name,
-                                SmallVectorImpl<wasm::ValType> &Rets,
-                                SmallVectorImpl<wasm::ValType> &Params);
+void getLibcallSignature(const WebAssemblySubtarget &Subtarget,
+                         RTLIB::Libcall LC,
+                         SmallVectorImpl<wasm::ValType> &Rets,
+                         SmallVectorImpl<wasm::ValType> &Params);
 
+void getLibcallSignature(const WebAssemblySubtarget &Subtarget, StringRef Name,
+                         SmallVectorImpl<wasm::ValType> &Rets,
+                         SmallVectorImpl<wasm::ValType> &Params);
+
+} // end namespace WebAssembly
 } // end namespace llvm
 
 #endif
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 8367f93..78bc043 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -683,6 +683,12 @@ def TuningFastGather
     : SubtargetFeature<"fast-gather", "HasFastGather", "true",
                        "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;
 
+// Generate vpdpwssd instead of vpmaddwd+vpaddd sequence.
+def TuningFastDPWSSD
+    : SubtargetFeature<
+          "fast-dpwssd", "HasFastDPWSSD", "true",
+          "Prefer vpdpwssd instruction over vpmaddwd+vpaddd instruction sequence">;
+
 def TuningPreferNoGather
     : SubtargetFeature<"prefer-no-gather", "PreferGather", "false",
                        "Prefer no gather instructions">;
@@ -1502,7 +1508,11 @@ def ProcessorFeatures {
     !listconcat(ZN2Tuning, ZN3AdditionalTuning);
   list<SubtargetFeature> ZN3Features =
     !listconcat(ZN2Features, ZN3AdditionalFeatures);
-  list<SubtargetFeature> ZN4Tuning = ZN3Tuning;
+
+
+  list<SubtargetFeature> ZN4AdditionalTuning = [TuningFastDPWSSD];
+  list<SubtargetFeature> ZN4Tuning =
+    !listconcat(ZN3Tuning, ZN4AdditionalTuning);
   list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
                                                   FeatureEVEX512,
                                                   FeatureCDI,
diff --git a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
index 6ef28b3..4537543 100644
--- a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
@@ -48,7 +48,7 @@ Error X86CodeGenPassBuilder::addInstSelector(AddMachinePass &) const {
 
 Error X86TargetMachine::buildCodeGenPipeline(
     ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
-    CodeGenFileType FileType, CGPassBuilderOption Opt,
+    CodeGenFileType FileType, const CGPassBuilderOption &Opt,
     PassInstrumentationCallbacks *PIC) {
   auto CGPB = X86CodeGenPassBuilder(*this, Opt, PIC);
   return CGPB.buildPipeline(MPM, Out, DwoOut, FileType);
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 76c6c16..4e4241e 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -3090,13 +3090,19 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
                                         SDValue &Scale, SDValue &Index,
                                         SDValue &Disp, SDValue &Segment) {
-  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
-  auto *GA = cast<GlobalAddressSDNode>(N);
+  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress ||
+         N.getOpcode() == ISD::TargetExternalSymbol);
 
   X86ISelAddressMode AM;
-  AM.GV = GA->getGlobal();
-  AM.Disp += GA->getOffset();
-  AM.SymbolFlags = GA->getTargetFlags();
+  if (auto *GA = dyn_cast<GlobalAddressSDNode>(N)) {
+    AM.GV = GA->getGlobal();
+    AM.Disp += GA->getOffset();
+    AM.SymbolFlags = GA->getTargetFlags();
+  } else {
+    auto *SA = cast<ExternalSymbolSDNode>(N);
+    AM.ES = SA->getSymbol();
+    AM.SymbolFlags = SA->getTargetFlags();
+  }
 
   if (Subtarget->is32Bit()) {
     AM.Scale = 1;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2b5e3c0..9b88611 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18592,13 +18592,22 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SDLoc dl(GA);
-  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
-                                           GA->getValueType(0),
-                                           GA->getOffset(),
-                                           OperandFlags);
+  SDValue TGA;
+  bool UseTLSDESC = DAG.getTarget().useTLSDESC();
+  if (LocalDynamic && UseTLSDESC) {
+    TGA = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, OperandFlags);
+    auto UI = TGA->use_begin();
+    // Reuse existing GetTLSADDR node if we can find it.
+    if (UI != TGA->use_end())
+      return SDValue(*UI->use_begin()->use_begin(), 0);
+  } else {
+    TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
+                                     GA->getOffset(), OperandFlags);
+  }
 
-  X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
-                                           : X86ISD::TLSADDR;
+  X86ISD::NodeType CallType = UseTLSDESC     ? X86ISD::TLSDESC
+                              : LocalDynamic ? X86ISD::TLSBASEADDR
+                                             : X86ISD::TLSADDR;
 
   if (InGlue) {
     SDValue Ops[] = { Chain,  TGA, *InGlue };
@@ -18613,7 +18622,19 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
   MFI.setHasCalls(true);
 
   SDValue Glue = Chain.getValue(1);
-  return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
+  SDValue Ret = DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
+
+  if (!UseTLSDESC)
+    return Ret;
+
+  const X86Subtarget &Subtarget = DAG.getSubtarget<X86Subtarget>();
+  unsigned Seg = Subtarget.is64Bit() ? X86AS::FS : X86AS::GS;
+
+  Value *Ptr = Constant::getNullValue(PointerType::get(*DAG.getContext(), Seg));
+  SDValue Offset =
+      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl),
+                  MachinePointerInfo(Ptr));
+  return DAG.getNode(ISD::ADD, dl, PtrVT, Ret, Offset);
 }
 
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
@@ -33426,6 +33447,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(TLSADDR)
   NODE_NAME_CASE(TLSBASEADDR)
   NODE_NAME_CASE(TLSCALL)
+  NODE_NAME_CASE(TLSDESC)
   NODE_NAME_CASE(EH_SJLJ_SETJMP)
   NODE_NAME_CASE(EH_SJLJ_LONGJMP)
   NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH)
@@ -36206,6 +36228,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case X86::TLS_base_addr32:
   case X86::TLS_base_addr64:
   case X86::TLS_base_addrX32:
+  case X86::TLS_desc32:
+  case X86::TLS_desc64:
     return EmitLoweredTLSAddr(MI, BB);
   case X86::INDIRECT_THUNK_CALL32:
   case X86::INDIRECT_THUNK_CALL64:
@@ -42641,7 +42665,6 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
 bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
     bool PoisonOnly, unsigned Depth) const {
-  unsigned EltsBits = Op.getScalarValueSizeInBits();
   unsigned NumElts = DemandedElts.getBitWidth();
 
   // TODO: Add more target shuffles.
@@ -42649,15 +42672,28 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
   case X86ISD::PSHUFD:
   case X86ISD::VPERMILPI: {
     SmallVector<int, 8> Mask;
-    DecodePSHUFMask(NumElts, EltsBits, Op.getConstantOperandVal(1), Mask);
-
-    APInt DemandedSrcElts = APInt::getZero(NumElts);
-    for (unsigned I = 0; I != NumElts; ++I)
-      if (DemandedElts[I])
-        DemandedSrcElts.setBit(Mask[I]);
-
-    return DAG.isGuaranteedNotToBeUndefOrPoison(
-        Op.getOperand(0), DemandedSrcElts, PoisonOnly, Depth + 1);
+    SmallVector<SDValue, 2> Ops;
+    if (getTargetShuffleMask(Op.getNode(), Op.getSimpleValueType(), true, Ops,
+                             Mask)) {
+      SmallVector<APInt, 2> DemandedSrcElts(Ops.size(),
+                                            APInt::getZero(NumElts));
+      for (auto M : enumerate(Mask)) {
+        if (!DemandedElts[M.index()] || M.value() == SM_SentinelZero)
+          continue;
+        if (M.value() == SM_SentinelUndef)
+          return false;
+        assert(0 <= M.value() && M.value() < (int)(Ops.size() * NumElts) &&
+               "Shuffle mask index out of range");
+        DemandedSrcElts[M.value() / NumElts].setBit(M.value() % NumElts);
+      }
+      for (auto Op : enumerate(Ops))
+        if (!DemandedSrcElts[Op.index()].isZero() &&
+            !DAG.isGuaranteedNotToBeUndefOrPoison(
+                Op.value(), DemandedSrcElts[Op.index()], PoisonOnly, Depth + 1))
+          return false;
+      return true;
+    }
+    break;
   }
   }
   return TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
@@ -47351,6 +47387,16 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget))
     return V;
 
+  APInt ShiftAmt;
+  if (supportedVectorVarShift(VT, Subtarget, ISD::SRA) &&
+      N1.getOpcode() == ISD::UMIN &&
+      ISD::isConstantSplatVector(N1.getOperand(1).getNode(), ShiftAmt) &&
+      ShiftAmt == VT.getScalarSizeInBits() - 1) {
+    SDValue ShrAmtVal = N1.getOperand(0);
+    SDLoc DL(N);
+    return DAG.getNode(X86ISD::VSRAV, DL, N->getVTList(), N0, ShrAmtVal);
+  }
+
   // fold (ashr (shl, a, [56,48,32,24,16]), SarConst)
   // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or
   // into (lshr, (sext (a), SarConst - [56,48,32,24,16]))
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index fe1943b..0a1e8ca 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -295,6 +295,10 @@ namespace llvm {
     // thunk at the address from an earlier relocation.
     TLSCALL,
 
+    // Thread Local Storage. A descriptor containing pointer to
+    // code and to argument to get the TLS offset for the symbol.
+    TLSDESC,
+
     // Exception Handling helpers.
     EH_RETURN,
 
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index f393f86..ce3b6af 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -507,6 +507,16 @@ def TLS_base_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
                   Requires<[In64BitMode, NotLP64]>;
 }
 
+// TLSDESC only clobbers EAX and EFLAGS. ESP is marked as a use to prevent
+// stack-pointer assignments that appear immediately before calls from
+// potentially appearing dead.
+let Defs = [EAX, EFLAGS], usesCustomInserter = 1, Uses = [RSP, SSP] in {
+  def TLS_desc32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+                     "# TLS_desc32", [(X86tlsdesc tls32addr:$sym)]>;
+  def TLS_desc64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+                     "# TLS_desc64", [(X86tlsdesc tls64addr:$sym)]>;
+}
+
 // Darwin TLS Support
 // For i386, the address of the thunk is passed on the stack, on return the
 // address of the variable is in %eax.  %ecx is trashed during the function
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index adf527d..f14c720 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -223,6 +223,9 @@ def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
 def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
                         [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
+def X86tlsdesc : SDNode<"X86ISD::TLSDESC", SDT_X86TLSADDR,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
 def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
                         [SDNPHasChain]>;
 
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index b65f495..43ae6fd 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10565,15 +10565,15 @@ bool X86InstrInfo::getMachineCombinerPatterns(
     bool DoRegPressureReduce) const {
   unsigned Opc = Root.getOpcode();
   switch (Opc) {
-  default:
-    return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
-                                                       DoRegPressureReduce);
   case X86::VPDPWSSDrr:
   case X86::VPDPWSSDrm:
   case X86::VPDPWSSDYrr:
   case X86::VPDPWSSDYrm: {
-    Patterns.push_back(MachineCombinerPattern::DPWSSD);
-    return true;
+    if (!Subtarget.hasFastDPWSSD()) {
+      Patterns.push_back(MachineCombinerPattern::DPWSSD);
+      return true;
+    }
+    break;
   }
   case X86::VPDPWSSDZ128r:
   case X86::VPDPWSSDZ128m:
@@ -10581,11 +10581,15 @@ bool X86InstrInfo::getMachineCombinerPatterns(
   case X86::VPDPWSSDZ256m:
   case X86::VPDPWSSDZr:
   case X86::VPDPWSSDZm: {
-    if (Subtarget.hasBWI())
+   if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
       Patterns.push_back(MachineCombinerPattern::DPWSSD);
-    return true;
+      return true;
+    }
+    break;
   }
   }
+  return TargetInstrInfo::getMachineCombinerPatterns(Root,
+                                                     Patterns, DoRegPressureReduce);
 }
 
 static void
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index 7dd51ba..b8e7768 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -238,5 +238,6 @@ def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
 def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
 def HasFSRM : Predicate<"Subtarget->hasFSRM()">;
 def HasMFence    : Predicate<"Subtarget->hasMFence()">;
+def HasFastDPWSSD: Predicate<"Subtarget->hasFastDPWSSD()">;
 def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">;
 def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">;
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 64d4d41..e2330ff 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -519,10 +519,8 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
                                  const MachineInstr &MI) {
   NoAutoPaddingScope NoPadScope(*OutStreamer);
-  bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 &&
-                  MI.getOpcode() != X86::TLS_base_addr32;
-  bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
-                      MI.getOpcode() == X86::TLS_base_addr64;
+  bool Is64Bits = getSubtarget().is64Bit();
+  bool Is64BitsLP64 = getSubtarget().isTarget64BitLP64();
   MCContext &Ctx = OutStreamer->getContext();
 
   MCSymbolRefExpr::VariantKind SRVK;
@@ -539,6 +537,10 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
   case X86::TLS_base_addrX32:
     SRVK = MCSymbolRefExpr::VK_TLSLD;
     break;
+  case X86::TLS_desc32:
+  case X86::TLS_desc64:
+    SRVK = MCSymbolRefExpr::VK_TLSDESC;
+    break;
   default:
     llvm_unreachable("unexpected opcode");
   }
@@ -554,7 +556,26 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
   bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
                 Ctx.getTargetOptions()->X86RelaxRelocations;
 
-  if (Is64Bits) {
+  if (SRVK == MCSymbolRefExpr::VK_TLSDESC) {
+    const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(
+        MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)),
+        MCSymbolRefExpr::VK_TLSCALL, Ctx);
+    EmitAndCountInstruction(
+        MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r)
+            .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
+            .addReg(Is64Bits ? X86::RIP : X86::EBX)
+            .addImm(1)
+            .addReg(0)
+            .addExpr(Sym)
+            .addReg(0));
+    EmitAndCountInstruction(
+        MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m)
+            .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
+            .addImm(1)
+            .addReg(0)
+            .addExpr(Expr)
+            .addReg(0));
+  } else if (Is64Bits) {
     bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
     if (NeedsPadding && Is64BitsLP64)
       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
@@ -2164,6 +2185,8 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
   case X86::TLS_base_addr32:
   case X86::TLS_base_addr64:
   case X86::TLS_base_addrX32:
+  case X86::TLS_desc32:
+  case X86::TLS_desc64:
     return LowerTlsAddr(MCInstLowering, *MI);
 
   case X86::MOVPC32r: {
diff --git a/llvm/lib/Target/X86/X86TargetMachine.h b/llvm/lib/Target/X86/X86TargetMachine.h
index 0fd3e47..4e7ded1 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/llvm/lib/Target/X86/X86TargetMachine.h
@@ -60,7 +60,7 @@ public:
 
   Error buildCodeGenPipeline(ModulePassManager &, raw_pwrite_stream &,
                              raw_pwrite_stream *, CodeGenFileType,
-                             CGPassBuilderOption,
+                             const CGPassBuilderOption &,
                              PassInstrumentationCallbacks *) override;
 
   bool isJIT() const { return IsJIT; }
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 1a5e6bc..23035f6 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -94,6 +94,7 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
       X86::TuningNoDomainDelayBlend,
       X86::TuningPreferShiftShuffle,
       X86::TuningFastImmVectorShift,
+      X86::TuningFastDPWSSD,
 
       // Perf-tuning flags.
       X86::TuningFastGather,
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 7c29d44..ae2a06a 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -1925,7 +1925,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
       U->replaceUsesOfWith(Def, CurrentReload);
       // Instructions are added to Def's user list if the attached
       // debug records use Def. Update those now.
-      for (DPValue &DPV : DPValue::filter(U->getDbgRecordRange()))
+      for (DPValue &DPV : filterDbgVars(U->getDbgRecordRange()))
         DPV.replaceVariableLocationOp(Def, CurrentReload, true);
     }
   }
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 086971a1f..0fce596 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -684,7 +684,7 @@ collectDbgVariableIntrinsics(Function &F) {
   SmallVector<DbgVariableIntrinsic *, 8> Intrinsics;
   SmallVector<DPValue *> DPValues;
   for (auto &I : instructions(F)) {
-    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
+    for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange()))
       DPValues.push_back(&DPV);
     if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
       Intrinsics.push_back(DVI);
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index ed5352e..4ff14b6 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -643,7 +643,7 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
        BI != BIE; ++BI) {
     // Examine DPValues as they happen "before" the instruction. Are they
     // connected to parameters?
-    for (DPValue &DPV : DPValue::filter(BI->getDbgRecordRange())) {
+    for (DPValue &DPV : filterDbgVars(BI->getDbgRecordRange())) {
       if (DPV.isDbgValue() || DPV.isDbgAssign()) {
         ExamineDbgValue(&DPV, PDPVRelated);
       } else {
@@ -686,7 +686,7 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
 
   // Collect the set of unrelated instructions and debug records.
   for (Instruction &I : *GEntryBlock) {
-    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
+    for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange()))
       IsPDIRelated(&DPV, PDPVRelated, PDPVUnrelatedWL);
     IsPDIRelated(&I, PDIRelated, PDIUnrelatedWL);
   }
diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index dd6062d..6b9a1f6 100644
--- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -575,7 +575,7 @@ bool writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
 }
 
 } // anonymous namespace
-
+extern bool WriteNewDbgInfoFormatToBitcode;
 PreservedAnalyses
 llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
   FunctionAnalysisManager &FAM =
@@ -583,8 +583,9 @@ llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
 
   // RemoveDIs: there's no bitcode representation of the DPValue debug-info,
   // convert to dbg.values before writing out.
-  bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
-  if (IsNewDbgInfoFormat)
+  bool ConvertToOldDbgFormatForWrite =
+      M.IsNewDbgInfoFormat && !WriteNewDbgInfoFormatToBitcode;
+  if (ConvertToOldDbgFormatForWrite)
     M.convertFromNewDbgValues();
 
   bool Changed = writeThinLTOBitcode(
@@ -594,7 +595,7 @@ llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
       },
       M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));
 
-  if (IsNewDbgInfoFormat)
+  if (ConvertToOldDbgFormatForWrite)
     M.convertToNewDbgValues();
 
   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 694b180..8537dbc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2289,13 +2289,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
         default:
           llvm_unreachable("unexpected intrinsic ID");
         }
-        Instruction *NewCall = Builder.CreateBinaryIntrinsic(
+        Value *V = Builder.CreateBinaryIntrinsic(
             IID, X, ConstantFP::get(Arg0->getType(), Res), II);
         // TODO: Conservatively intersecting FMF. If Res == C2, the transform
         //       was a simplification (so Arg0 and its original flags could
         //       propagate?)
-        NewCall->andIRFlags(M);
-        return replaceInstUsesWith(*II, NewCall);
+        if (auto *CI = dyn_cast<CallInst>(V))
+          CI->andIRFlags(M);
+        return replaceInstUsesWith(*II, V);
       }
     }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 6a1ef6e..e2b744b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -98,6 +98,7 @@ public:
   Instruction *visitSub(BinaryOperator &I);
   Instruction *visitFSub(BinaryOperator &I);
   Instruction *visitMul(BinaryOperator &I);
+  Instruction *foldPowiReassoc(BinaryOperator &I);
   Instruction *foldFMulReassoc(BinaryOperator &I);
   Instruction *visitFMul(BinaryOperator &I);
   Instruction *visitURem(BinaryOperator &I);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 278be62..9d4c271 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -448,6 +448,14 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
   if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
     return SelectInst::Create(X, Op0, ConstantInt::getNullValue(Ty));
 
+  // mul (sext X), Y -> select X, -Y, 0
+  // mul Y, (sext X) -> select X, -Y, 0
+  if (match(&I, m_c_Mul(m_OneUse(m_SExt(m_Value(X))), m_Value(Y))) &&
+      X->getType()->isIntOrIntVectorTy(1))
+    return SelectInst::Create(
+        X, Builder.CreateNeg(Y, "", /*HasNUW=*/false, I.hasNoSignedWrap()),
+        ConstantInt::getNullValue(Op0->getType()));
+
   Constant *ImmC;
   if (match(Op1, m_ImmConstant(ImmC))) {
     // (sext bool X) * C --> X ? -C : 0
@@ -571,6 +579,42 @@ Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) {
   return nullptr;
 }
 
+Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
+  auto createPowiExpr = [](BinaryOperator &I, InstCombinerImpl &IC, Value *X,
+                           Value *Y, Value *Z) {
+    InstCombiner::BuilderTy &Builder = IC.Builder;
+    Value *YZ = Builder.CreateAdd(Y, Z);
+    auto *NewPow = Builder.CreateIntrinsic(
+        Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);
+    return IC.replaceInstUsesWith(I, NewPow);
+  };
+
+  Value *X, *Y, *Z;
+
+  // powi(X, Y) * X --> powi(X, Y+1)
+  // X * powi(X, Y) --> powi(X, Y+1)
+  if (match(&I, m_c_FMul(m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
+                             m_Value(X), m_Value(Y)))),
+                         m_Deferred(X)))) {
+    Constant *One = ConstantInt::get(Y->getType(), 1);
+    if (willNotOverflowSignedAdd(Y, One, I))
+      return createPowiExpr(I, *this, X, Y, One);
+  }
+
+  // powi(x, y) * powi(x, z) -> powi(x, y + z)
+  Value *Op0 = I.getOperand(0);
+  Value *Op1 = I.getOperand(1);
+  if (I.isOnlyUserOfAnyOperand() &&
+      match(Op0, m_AllowReassoc(
+                     m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y)))) &&
+      match(Op1, m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(m_Specific(X),
+                                                             m_Value(Z)))) &&
+      Y->getType() == Z->getType())
+    return createPowiExpr(I, *this, X, Y, Z);
+
+  return nullptr;
+}
+
 Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0);
   Value *Op1 = I.getOperand(1);
@@ -683,6 +727,9 @@ Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
     return replaceInstUsesWith(I, Pow);
   }
 
+  if (Instruction *FoldedPowi = foldPowiReassoc(I))
+    return FoldedPowi;
+
   if (I.isOnlyUserOfAnyOperand()) {
     // pow(X, Y) * pow(X, Z) -> pow(X, Y + Z)
     if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) &&
@@ -699,16 +746,6 @@ Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
       return replaceInstUsesWith(I, NewPow);
     }
 
-    // powi(x, y) * powi(x, z) -> powi(x, y + z)
-    if (match(Op0, m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y))) &&
-        match(Op1, m_Intrinsic<Intrinsic::powi>(m_Specific(X), m_Value(Z))) &&
-        Y->getType() == Z->getType()) {
-      auto *YZ = Builder.CreateAdd(Y, Z);
-      auto *NewPow = Builder.CreateIntrinsic(
-          Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);
-      return replaceInstUsesWith(I, NewPow);
-    }
-
     // exp(X) * exp(Y) -> exp(X + Y)
     if (match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))) &&
         match(Op1, m_Intrinsic<Intrinsic::exp>(m_Value(Y)))) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index aee18d7..ee76a62 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1191,7 +1191,7 @@ static Value *canonicalizeSPF(ICmpInst &Cmp, Value *TrueVal, Value *FalseVal,
                           match(RHS, m_NSWNeg(m_Specific(LHS)));
     Constant *IntMinIsPoisonC =
         ConstantInt::get(Type::getInt1Ty(Cmp.getContext()), IntMinIsPoison);
-    Instruction *Abs =
+    Value *Abs =
         IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC);
 
     if (SPF == SelectPatternFlavor::SPF_NABS)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index c9bbe43..edb046de 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1481,6 +1481,11 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
 
   // If we have a constant rhs, see if we can losslessly convert it to an int.
   if (Op1FpC != nullptr) {
+    // Signed + Mul req non-zero
+    if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
+        !match(Op1FpC, m_NonZeroFP()))
+      return nullptr;
+
     Constant *Op1IntC = ConstantFoldCastOperand(
         OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, Op1FpC,
         IntTy, DL);
@@ -2596,6 +2601,45 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
     return nullptr;
   }
 
+  // De Morgan's Laws:
+  // (~(A | B)) -> (~A & ~B)
+  // (~(A & B)) -> (~A | ~B)
+  auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
+                                         bool IsLogical, Value *A,
+                                         Value *B) -> Value * {
+    bool LocalDoesConsume = DoesConsume;
+    if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder=*/nullptr,
+                               LocalDoesConsume, Depth))
+      return nullptr;
+    if (auto *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
+                                           LocalDoesConsume, Depth)) {
+      auto *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
+                                         LocalDoesConsume, Depth);
+      DoesConsume = LocalDoesConsume;
+      if (IsLogical)
+        return Builder ? Builder->CreateLogicalOp(Opcode, NotA, NotB) : NonNull;
+      return Builder ? Builder->CreateBinOp(Opcode, NotA, NotB) : NonNull;
+    }
+
+    return nullptr;
+  };
+
+  if (match(V, m_Or(m_Value(A), m_Value(B))))
+    return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
+                                       B);
+
+  if (match(V, m_And(m_Value(A), m_Value(B))))
+    return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
+                                       B);
+
+  if (match(V, m_LogicalOr(m_Value(A), m_Value(B))))
+    return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
+                                       B);
+
+  if (match(V, m_LogicalAnd(m_Value(A), m_Value(B))))
+    return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
+                                       B);
+
   return nullptr;
 }
 
@@ -3834,6 +3878,12 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
     if (Instruction *Res = foldOpIntoPhi(EV, PN))
       return Res;
 
+  // Canonicalize extract (select Cond, TV, FV)
+  // -> select cond, (extract TV), (extract FV)
+  if (auto *SI = dyn_cast<SelectInst>(Agg))
+    if (Instruction *R = FoldOpIntoSelect(EV, SI, /*FoldWithMultiUse=*/true))
+      return R;
+
   // We could simplify extracts from other values. Note that nested extracts may
   // already be simplified implicitly by the above: extract (extract (insert) )
   // will be translated into extract ( insert ( extract ) ) first and then just
@@ -4697,7 +4747,7 @@ void InstCombinerImpl::tryToSinkInstructionDPValues(
     // latest assignment.
     for (const Instruction *Inst : DupSet) {
       for (DPValue &DPV :
-           llvm::reverse(DPValue::filter(Inst->getDbgRecordRange()))) {
+           llvm::reverse(filterDbgVars(Inst->getDbgRecordRange()))) {
         DebugVariable DbgUserVariable =
             DebugVariable(DPV.getVariable(), DPV.getExpression(),
                           DPV.getDebugLoc()->getInlinedAt());
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 87584da..9c814e1 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1381,11 +1381,6 @@ bool HWAddressSanitizer::instrumentLandingPads(
   return true;
 }
 
-static bool isLifetimeIntrinsic(Value *V) {
-  auto *II = dyn_cast<IntrinsicInst>(V);
-  return II && II->isLifetimeStartOrEnd();
-}
-
 static DbgAssignIntrinsic *DynCastToDbgAssign(DbgVariableIntrinsic *DVI) {
   return dyn_cast<DbgAssignIntrinsic>(DVI);
 }
@@ -1445,7 +1440,8 @@ bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
 
     AI->replaceUsesWithIf(Replacement, [AICast, AILong](const Use &U) {
       auto *User = U.getUser();
-      return User != AILong && User != AICast && !isLifetimeIntrinsic(User);
+      return User != AILong && User != AICast &&
+             !memtag::isLifetimeIntrinsic(User);
     });
 
     // Helper utility for adding DW_OP_LLVM_tag_offset to debug-info records,
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 8ee0bca..956ebe8 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -43,7 +43,6 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/EscapeEnumerator.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -738,8 +737,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
     Value *Args[] = {Addr,
                      IRB.CreateBitOrPointerCast(SI->getValueOperand(), Ty),
                      createOrdering(&IRB, SI->getOrdering())};
-    CallInst *C = CallInst::Create(TsanAtomicStore[Idx], Args);
-    ReplaceInstWithInst(I, C);
+    IRB.CreateCall(TsanAtomicStore[Idx], Args);
+    SI->eraseFromParent();
   } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) {
     Value *Addr = RMWI->getPointerOperand();
     int Idx =
@@ -752,11 +751,12 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
     const unsigned ByteSize = 1U << Idx;
     const unsigned BitSize = ByteSize * 8;
     Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
-    Value *Args[] = {Addr,
-                     IRB.CreateIntCast(RMWI->getValOperand(), Ty, false),
+    Value *Val = RMWI->getValOperand();
+    Value *Args[] = {Addr, IRB.CreateBitOrPointerCast(Val, Ty),
                      createOrdering(&IRB, RMWI->getOrdering())};
-    CallInst *C = CallInst::Create(F, Args);
-    ReplaceInstWithInst(I, C);
+    Value *C = IRB.CreateCall(F, Args);
+    I->replaceAllUsesWith(IRB.CreateBitOrPointerCast(C, Val->getType()));
+    I->eraseFromParent();
   } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
     Value *Addr = CASI->getPointerOperand();
     Type *OrigOldValTy = CASI->getNewValOperand()->getType();
@@ -794,8 +794,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
     FunctionCallee F = FI->getSyncScopeID() == SyncScope::SingleThread
                            ? TsanAtomicSignalFence
                            : TsanAtomicThreadFence;
-    CallInst *C = CallInst::Create(F, Args);
-    ReplaceInstWithInst(I, C);
+    IRB.CreateCall(F, Args);
+    FI->eraseFromParent();
   }
   return true;
 }
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 85d4065..e66a1a2 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -65,6 +65,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/CFG.h"
@@ -131,9 +132,9 @@ void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
 
 class DFAJumpThreading {
 public:
-  DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT,
+  DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
                    TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
-      : AC(AC), DT(DT), TTI(TTI), ORE(ORE) {}
+      : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
 
   bool run(Function &F);
 
@@ -161,6 +162,7 @@ private:
 
   AssumptionCache *AC;
   DominatorTree *DT;
+  LoopInfo *LI;
   TargetTransformInfo *TTI;
   OptimizationRemarkEmitter *ORE;
 };
@@ -378,7 +380,8 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
 #endif
 
 struct MainSwitch {
-  MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) {
+  MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE)
+      : LI(LI) {
     if (isCandidate(SI)) {
       Instr = SI;
     } else {
@@ -411,6 +414,10 @@ private:
     if (!isa<PHINode>(SICond))
       return false;
 
+    // The switch must be in a loop.
+    if (!LI->getLoopFor(SI->getParent()))
+      return false;
+
     addToQueue(SICond, Q, SeenValues);
 
     while (!Q.empty()) {
@@ -488,6 +495,7 @@ private:
     return true;
   }
 
+  LoopInfo *LI;
   SwitchInst *Instr = nullptr;
   SmallVector<SelectInstToUnfold, 4> SelectInsts;
 };
@@ -1262,7 +1270,7 @@ bool DFAJumpThreading::run(Function &F) {
 
     LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
                       << " is a candidate\n");
-    MainSwitch Switch(SI, ORE);
+    MainSwitch Switch(SI, LI, ORE);
 
     if (!Switch.getInstr())
       continue;
@@ -1315,10 +1323,11 @@ PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
                                             FunctionAnalysisManager &AM) {
   AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
   TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
   OptimizationRemarkEmitter ORE(&F);
 
-  if (!DFAJumpThreading(&AC, &DT, &TTI, &ORE).run(F))
+  if (!DFAJumpThreading(&AC, &DT, &LI, &TTI, &ORE).run(F))
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 1058a01..5d7b050e 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -401,7 +401,7 @@ static bool replaceFoldableUses(Instruction *Cond, Value *ToVal,
     Changed |= replaceNonLocalUsesWith(Cond, ToVal);
   for (Instruction &I : reverse(*KnownAtEndOfBB)) {
     // Replace any debug-info record users of Cond with ToVal.
-    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
+    for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange()))
       DPV.replaceVariableLocationOp(Cond, ToVal, true);
 
     // Reached the Cond whose uses we are trying to replace, so there are no
@@ -2081,7 +2081,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
 
   auto CloneAndRemapDbgInfo = [&](Instruction *NewInst, Instruction *From) {
     auto DPVRange = NewInst->cloneDebugInfoFrom(From);
-    for (DPValue &DPV : DPValue::filter(DPVRange))
+    for (DPValue &DPV : filterDbgVars(DPVRange))
       RetargetDPValueIfPossible(&DPV);
   };
 
@@ -2116,7 +2116,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
     DPMarker *Marker = RangeBB->getMarker(BE);
     DPMarker *EndMarker = NewBB->createMarker(NewBB->end());
     auto DPVRange = EndMarker->cloneDebugInfoFrom(Marker, std::nullopt);
-    for (DPValue &DPV : DPValue::filter(DPVRange))
+    for (DPValue &DPV : filterDbgVars(DPVRange))
       RetargetDPValueIfPossible(&DPV);
   }
 
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index c4e1a0d..7641ba2 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -6711,7 +6711,7 @@ static void DbgGatherSalvagableDVI(
         SalvageableDVISCEVs.push_back(std::move(NewRec));
         return true;
       };
-      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
+      for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange())) {
         if (DPV.isDbgValue() || DPV.isDbgAssign())
           ProcessDbgValue(&DPV);
       }
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index 8686570..44ea6e9 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -293,7 +293,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
   for (const auto &I : FromBlock) {
     // Make note of any DPValues that need hoisting. DPLabels
     // get left behind just like llvm.dbg.labels.
-    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
+    for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange())) {
       if (HasNoUnhoistedInstr(DPV.location_ops()))
         DPValuesToHoist[DPV.getInstruction()].push_back(&DPV);
     }
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 2006b40..e7ad18d 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -504,7 +504,7 @@ static bool DPValuesRemoveRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
   DenseMap<DebugVariable, std::pair<SmallVector<Value *, 4>, DIExpression *>>
       VariableMap;
   for (auto &I : *BB) {
-    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
+    for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange())) {
       if (DPV.getType() == DPValue::LocationType::Declare)
         continue;
       DebugVariable Key(DPV.getVariable(), std::nullopt,
@@ -553,7 +553,7 @@ static bool DPValuesRemoveUndefDbgAssignsFromEntryBlock(BasicBlock *BB) {
   // Remove undef dbg.assign intrinsics that are encountered before
   // any non-undef intrinsics from the entry block.
   for (auto &I : *BB) {
-    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
+    for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange())) {
       if (!DPV.isDbgValue() && !DPV.isDbgAssign())
         continue;
       bool IsDbgValueKind =
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 1bbe76a..33d3272 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1829,7 +1829,7 @@ static void fixupAssignments(Function::iterator Start, Function::iterator End) {
   // attachment or use, replace it with a new version.
   for (auto BBI = Start; BBI != End; ++BBI) {
     for (Instruction &I : *BBI) {
-      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
+      for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange())) {
         if (DPV.isDbgAssign())
           DPV.setAssignId(GetNewID(DPV.getAssignID()));
       }
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index a87e5a3..422319e 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1911,7 +1911,7 @@ bool llvm::LowerDbgDeclare(Function &F) {
     for (Instruction &BI : FI) {
       if (auto *DDI = dyn_cast<DbgDeclareInst>(&BI))
         Dbgs.push_back(DDI);
-      for (DPValue &DPV : DPValue::filter(BI.getDbgRecordRange())) {
+      for (DPValue &DPV : filterDbgVars(BI.getDbgRecordRange())) {
         if (DPV.getType() == DPValue::LocationType::Declare)
           DPVs.push_back(&DPV);
       }
@@ -1996,7 +1996,7 @@ static void insertDPValuesForPHIs(BasicBlock *BB,
   // Map existing PHI nodes to their DPValues.
   DenseMap<Value *, DPValue *> DbgValueMap;
   for (auto &I : *BB) {
-    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
+    for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange())) {
       for (Value *V : DPV.location_ops())
         if (auto *Loc = dyn_cast_or_null<PHINode>(V))
           DbgValueMap.insert({Loc, &DPV});
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index acfd87c..980ecf0 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -554,7 +554,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
         DbgIntrinsics.insert(makeHash(DII));
         // Until RemoveDIs supports dbg.declares in DPValue format, we'll need
         // to collect DPValues attached to any other debug intrinsics.
-        for (const DPValue &DPV : DPValue::filter(DII->getDbgRecordRange()))
+        for (const DPValue &DPV : filterDbgVars(DII->getDbgRecordRange()))
           DbgIntrinsics.insert(makeHash(&DPV));
       } else {
         break;
@@ -564,7 +564,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     // Build DPValue hashes for DPValues attached to the terminator, which isn't
     // considered in the loop above.
     for (const DPValue &DPV :
-         DPValue::filter(OrigPreheader->getTerminator()->getDbgRecordRange()))
+         filterDbgVars(OrigPreheader->getTerminator()->getDbgRecordRange()))
       DbgIntrinsics.insert(makeHash(&DPV));
 
     // Remember the local noalias scope declarations in the header. After the
@@ -631,7 +631,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
                             RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
           // Erase anything we've seen before.
           for (DPValue &DPV :
-               make_early_inc_range(DPValue::filter(DbgValueRange)))
+               make_early_inc_range(filterDbgVars(DbgValueRange)))
             if (DbgIntrinsics.count(makeHash(&DPV)))
               DPV.eraseFromParent();
         }
@@ -657,7 +657,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
                           RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
         NextDbgInsts = DPMarker::getEmptyDbgRecordRange();
         // Erase anything we've seen before.
-        for (DPValue &DPV : make_early_inc_range(DPValue::filter(Range)))
+        for (DPValue &DPV : make_early_inc_range(filterDbgVars(Range)))
           if (DbgIntrinsics.count(makeHash(&DPV)))
             DPV.eraseFromParent();
       }
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 05b02a4..0748b9d 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -634,7 +634,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
         // RemoveDIs: do the same as below for DPValues.
         if (Block->IsNewDbgInfoFormat) {
           for (DPValue &DPV : llvm::make_early_inc_range(
-                   DPValue::filter(I.getDbgRecordRange()))) {
+                   filterDbgVars(I.getDbgRecordRange()))) {
             DebugVariable Key(DPV.getVariable(), DPV.getExpression(),
                               DPV.getDebugLoc().get());
             if (!DeadDebugSet.insert(Key).second)
diff --git a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
index ed06d3e..cab1958 100644
--- a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
+++ b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
@@ -110,7 +110,7 @@ Instruction *getUntagLocationIfFunctionExit(Instruction &Inst) {
 
 void StackInfoBuilder::visit(Instruction &Inst) {
   // Visit non-intrinsic debug-info records attached to Inst.
-  for (DPValue &DPV : DPValue::filter(Inst.getDbgRecordRange())) {
+  for (DPValue &DPV : filterDbgVars(Inst.getDbgRecordRange())) {
     auto AddIfInteresting = [&](Value *V) {
       if (auto *AI = dyn_cast_or_null<AllocaInst>(V)) {
         if (!isInterestingAlloca(*AI))
@@ -236,5 +236,10 @@ void alignAndPadAlloca(memtag::AllocaInfo &Info, llvm::Align Alignment) {
   Info.AI = NewAI;
 }
 
+bool isLifetimeIntrinsic(Value *V) {
+  auto *II = dyn_cast<IntrinsicInst>(V);
+  return II && II->isLifetimeStartOrEnd();
+}
+
 } // namespace memtag
 } // namespace llvm
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index a185e8c..38fc776 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -1486,7 +1486,13 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
   // Try to simplify to a constant range.
   ConstantRange A = getConstantRange(V1State, I.getType());
   ConstantRange B = getConstantRange(V2State, I.getType());
-  ConstantRange R = A.binaryOp(cast<BinaryOperator>(&I)->getOpcode(), B);
+
+  auto *BO = cast<BinaryOperator>(&I);
+  ConstantRange R = ConstantRange::getEmpty(I.getType()->getScalarSizeInBits());
+  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(BO))
+    R = A.overflowingBinaryOp(BO->getOpcode(), B, OBO->getNoWrapKind());
+  else
+    R = A.binaryOp(BO->getOpcode(), B);
   mergeInValue(&I, ValueLatticeElement::getRange(R));
 
   // TODO: Currently we do not exploit special values that produce something
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 6d2a6a3..292ad78 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3850,7 +3850,7 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
   if (PredBlock->IsNewDbgInfoFormat) {
     PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
     for (DPValue &DPV :
-         DPValue::filter(PredBlock->getTerminator()->getDbgRecordRange())) {
+         filterDbgVars(PredBlock->getTerminator()->getDbgRecordRange())) {
       RemapDPValue(M, &DPV, VMap,
                    RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
     }
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index b8fa985..f6d7226 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -1381,6 +1381,77 @@ const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
   };
 }
 
+namespace {
+
+// Represents a interesting integer binary operation for
+// getExtendedOperandRecurrence. This may be a shl that is being treated as a
+// multiply or a 'or disjoint' that is being treated as 'add nsw nuw'.
+struct BinaryOp {
+  unsigned Opcode;
+  std::array<Value *, 2> Operands;
+  bool IsNSW = false;
+  bool IsNUW = false;
+
+  explicit BinaryOp(Instruction *Op)
+      : Opcode(Op->getOpcode()),
+        Operands({Op->getOperand(0), Op->getOperand(1)}) {
+    if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) {
+      IsNSW = OBO->hasNoSignedWrap();
+      IsNUW = OBO->hasNoUnsignedWrap();
+    }
+  }
+
+  explicit BinaryOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
+                    bool IsNSW = false, bool IsNUW = false)
+      : Opcode(Opcode), Operands({LHS, RHS}), IsNSW(IsNSW), IsNUW(IsNUW) {}
+};
+
+} // end anonymous namespace
+
+static std::optional<BinaryOp> matchBinaryOp(Instruction *Op) {
+  switch (Op->getOpcode()) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    return BinaryOp(Op);
+  case Instruction::Or: {
+    // Convert or disjoint into add nuw nsw.
+    if (cast<PossiblyDisjointInst>(Op)->isDisjoint())
+      return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1),
+                      /*IsNSW=*/true, /*IsNUW=*/true);
+    break;
+  }
+  case Instruction::Shl: {
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(Op->getOperand(1))) {
+      unsigned BitWidth = cast<IntegerType>(SA->getType())->getBitWidth();
+
+      // If the shift count is not less than the bitwidth, the result of
+      // the shift is undefined. Don't try to analyze it, because the
+      // resolution chosen here may differ from the resolution chosen in
+      // other parts of the compiler.
+      if (SA->getValue().ult(BitWidth)) {
+        // We can safely preserve the nuw flag in all cases. It's also safe to
+        // turn a nuw nsw shl into a nuw nsw mul. However, nsw in isolation
+        // requires special handling. It can be preserved as long as we're not
+        // left shifting by bitwidth - 1.
+        bool IsNUW = Op->hasNoUnsignedWrap();
+        bool IsNSW = Op->hasNoSignedWrap() &&
+                     (IsNUW || SA->getValue().ult(BitWidth - 1));
+
+        ConstantInt *X =
+            ConstantInt::get(Op->getContext(),
+                             APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
+        return BinaryOp(Instruction::Mul, Op->getOperand(0), X, IsNSW, IsNUW);
+      }
+    }
+
+    break;
+  }
+  }
+
+  return std::nullopt;
+}
+
 /// No-wrap operations can transfer sign extension of their result to their
 /// operands. Generate the SCEV value for the widened operation without
 /// actually modifying the IR yet. If the expression after extending the
@@ -1388,24 +1459,22 @@ const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
 /// extension used.
 WidenIV::WidenedRecTy
 WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
-  // Handle the common case of add<nsw/nuw>
-  const unsigned OpCode = DU.NarrowUse->getOpcode();
-  // Only Add/Sub/Mul instructions supported yet.
-  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
-      OpCode != Instruction::Mul)
+  auto Op = matchBinaryOp(DU.NarrowUse);
+  if (!Op)
     return {nullptr, ExtendKind::Unknown};
 
+  assert((Op->Opcode == Instruction::Add || Op->Opcode == Instruction::Sub ||
+          Op->Opcode == Instruction::Mul) &&
+         "Unexpected opcode");
+
   // One operand (NarrowDef) has already been extended to WideDef. Now determine
   // if extending the other will lead to a recurrence.
-  const unsigned ExtendOperIdx =
-      DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
-  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
+  const unsigned ExtendOperIdx = Op->Operands[0] == DU.NarrowDef ? 1 : 0;
+  assert(Op->Operands[1 - ExtendOperIdx] == DU.NarrowDef && "bad DU");
 
-  const OverflowingBinaryOperator *OBO =
-    cast<OverflowingBinaryOperator>(DU.NarrowUse);
   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
-  if (!(ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap()) &&
-      !(ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap())) {
+  if (!(ExtKind == ExtendKind::Sign && Op->IsNSW) &&
+      !(ExtKind == ExtendKind::Zero && Op->IsNUW)) {
     ExtKind = ExtendKind::Unknown;
 
     // For a non-negative NarrowDef, we can choose either type of
@@ -1413,16 +1482,15 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
     // (see above), and we only hit this code if we need to check
     // the opposite case.
     if (DU.NeverNegative) {
-      if (OBO->hasNoSignedWrap()) {
+      if (Op->IsNSW) {
         ExtKind = ExtendKind::Sign;
-      } else if (OBO->hasNoUnsignedWrap()) {
+      } else if (Op->IsNUW) {
         ExtKind = ExtendKind::Zero;
       }
     }
   }
 
-  const SCEV *ExtendOperExpr =
-      SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx));
+  const SCEV *ExtendOperExpr = SE->getSCEV(Op->Operands[ExtendOperIdx]);
   if (ExtKind == ExtendKind::Sign)
     ExtendOperExpr = SE->getSignExtendExpr(ExtendOperExpr, WideType);
   else if (ExtKind == ExtendKind::Zero)
@@ -1443,7 +1511,7 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
   if (ExtendOperIdx == 0)
     std::swap(lhs, rhs);
   const SCEVAddRecExpr *AddRec =
-      dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
+      dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, Op->Opcode));
 
   if (!AddRec || AddRec->getLoop() != L)
     return {nullptr, ExtendKind::Unknown};
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index abb7a44..d037742 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -1239,7 +1239,7 @@ void ValueMapper::remapDPValue(Module *M, DPValue &V) {
 
 void ValueMapper::remapDPValueRange(
     Module *M, iterator_range<DbgRecord::self_iterator> Range) {
-  for (DPValue &DPV : DPValue::filter(Range)) {
+  for (DPValue &DPV : filterDbgVars(Range)) {
     remapDPValue(M, DPV);
   }
 }
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index edaad4d..52b992b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6911,25 +6911,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
       Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
 
     SmallVector<const Value *, 4> Operands(I->operand_values());
-    auto InstrCost = TTI.getArithmeticInstrCost(
+    return TTI.getArithmeticInstrCost(
         I->getOpcode(), VectorTy, CostKind,
         {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
-        Op2Info, Operands, I);
-
-    // Some targets can replace frem with vector library calls.
-    InstructionCost VecCallCost = InstructionCost::getInvalid();
-    if (I->getOpcode() == Instruction::FRem) {
-      LibFunc Func;
-      if (TLI->getLibFunc(I->getOpcode(), I->getType(), Func) &&
-          TLI->isFunctionVectorizable(TLI->getName(Func), VF)) {
-        SmallVector<Type *, 4> OpTypes;
-        for (auto &Op : I->operands())
-          OpTypes.push_back(Op->getType());
-        VecCallCost =
-            TTI.getCallInstrCost(nullptr, VectorTy, OpTypes, CostKind);
-      }
-    }
-    return std::min(InstrCost, VecCallCost);
+        Op2Info, Operands, I, TLI);
   }
   case Instruction::FNeg: {
     return TTI.getArithmeticInstrCost(
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 739dae3..6dbd540 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4295,9 +4295,12 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
               llvm_unreachable(
                   "Expected only consecutive, strided or masked gather loads.");
             }
+            SmallVector<int> ShuffleMask(VL.size());
+            for (int Idx : seq<int>(0, VL.size()))
+              ShuffleMask[Idx] = Idx / VF == I ? VL.size() + Idx % VF : Idx;
             VecLdCost +=
                 TTI.getShuffleCost(TTI ::SK_InsertSubvector, VecTy,
-                                   std::nullopt, CostKind, I * VF, SubVecTy);
+                                   ShuffleMask, CostKind, I * VF, SubVecTy);
           }
           // If masked gather cost is higher - better to vectorize, so
           // consider it as a gather node. It will be better estimated
@@ -4313,9 +4316,10 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
     // increases the cost.
     Loop *L = LI->getLoopFor(cast<LoadInst>(VL0)->getParent());
     bool ProfitableGatherPointers =
-        L && Sz > 2 && count_if(PointerOps, [L](Value *V) {
-                         return L->isLoopInvariant(V);
-                       }) <= Sz / 2;
+        L && Sz > 2 &&
+        static_cast<unsigned>(count_if(PointerOps, [L](Value *V) {
+          return L->isLoopInvariant(V);
+        })) <= Sz / 2;
     if (ProfitableGatherPointers || all_of(PointerOps, [IsSorted](Value *P) {
           auto *GEP = dyn_cast<GetElementPtrInst>(P);
           return (IsSorted && !GEP && doesNotNeedToBeScheduled(P)) ||
@@ -7400,7 +7404,7 @@ getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind,
         Index + NumSrcElts <= static_cast<int>(Mask.size()))
       return TTI.getShuffleCost(
           TTI::SK_InsertSubvector,
-          FixedVectorType::get(Tp->getElementType(), Mask.size()), std::nullopt,
+          FixedVectorType::get(Tp->getElementType(), Mask.size()), Mask,
           TTI::TCK_RecipThroughput, Index, Tp);
   }
   return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
@@ -7673,9 +7677,13 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
         }
         if (NeedInsertSubvectorAnalysis) {
           // Add the cost for the subvectors insert.
-          for (int I = VF, E = VL.size(); I < E; I += VF)
+          SmallVector<int> ShuffleMask(VL.size());
+          for (unsigned I = VF, E = VL.size(); I < E; I += VF) {
+            for (unsigned Idx : seq<unsigned>(0, E))
+              ShuffleMask[Idx] = Idx / VF == I ? E + Idx % VF : Idx;
             GatherCost += TTI.getShuffleCost(TTI::SK_InsertSubvector, VecTy,
-                                             std::nullopt, CostKind, I, LoadTy);
+                                             ShuffleMask, CostKind, I, LoadTy);
+          }
         }
         GatherCost -= ScalarsCost;
       }
@@ -7690,16 +7698,22 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
       bool NeedShuffle =
           count(VL, *It) > 1 &&
           (VL.front() != *It || !all_of(VL.drop_front(), UndefValue::classof));
+      if (!NeedShuffle)
+        return TTI.getVectorInstrCost(Instruction::InsertElement, VecTy,
+                                      CostKind, std::distance(VL.begin(), It),
+                                      PoisonValue::get(VecTy), *It);
+
+      SmallVector<int> ShuffleMask(VL.size(), PoisonMaskElem);
+      transform(VL, ShuffleMask.begin(), [](Value *V) {
+        return isa<PoisonValue>(V) ? PoisonMaskElem : 0;   
+      });
       InstructionCost InsertCost = TTI.getVectorInstrCost(
-          Instruction::InsertElement, VecTy, CostKind,
-          NeedShuffle ? 0 : std::distance(VL.begin(), It),
+          Instruction::InsertElement, VecTy, CostKind, 0,
           PoisonValue::get(VecTy), *It);
       return InsertCost +
-             (NeedShuffle ? TTI.getShuffleCost(
-                                TargetTransformInfo::SK_Broadcast, VecTy,
-                                /*Mask=*/std::nullopt, CostKind, /*Index=*/0,
-                                /*SubTp=*/nullptr, /*Args=*/*It)
-                          : TTI::TCC_Free);
+             TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
+                                ShuffleMask, CostKind, /*Index=*/0,
+                                /*SubTp=*/nullptr, /*Args=*/*It);
     }
     return GatherCost +
            (all_of(Gathers, UndefValue::classof)
@@ -8852,7 +8866,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
       TTI::OperandValueInfo Op1Info = getOperandInfo(E->getOperand(0));
       TTI::OperandValueInfo Op2Info = getOperandInfo(E->getOperand(OpIdx));
       return TTI->getArithmeticInstrCost(ShuffleOrOp, VecTy, CostKind, Op1Info,
-                                         Op2Info) +
+                                         Op2Info, std::nullopt, nullptr, TLI) +
              CommonCost;
     };
     return GetCostDiff(GetScalarCost, GetVectorCost);
@@ -10374,7 +10388,7 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL,
   // Check if the same elements are inserted several times and count them as
   // shuffle candidates.
   APInt ShuffledElements = APInt::getZero(VL.size());
-  DenseSet<Value *> UniqueElements;
+  DenseMap<Value *, unsigned> UniqueElements;
   constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
   InstructionCost Cost;
   auto EstimateInsertCost = [&](unsigned I, Value *V) {
@@ -10383,27 +10397,34 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL,
           TTI->getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind,
                                   I, Constant::getNullValue(VecTy), V);
   };
+  SmallVector<int> ShuffleMask(VL.size(), PoisonMaskElem);
   for (unsigned I = 0, E = VL.size(); I < E; ++I) {
     Value *V = VL[I];
     // No need to shuffle duplicates for constants.
     if ((ForPoisonSrc && isConstant(V)) || isa<UndefValue>(V)) {
       ShuffledElements.setBit(I);
+      ShuffleMask[I] = isa<PoisonValue>(V) ? PoisonMaskElem : I;
       continue;
     }
-    if (!UniqueElements.insert(V).second) {
-      DuplicateNonConst = true;
-      ShuffledElements.setBit(I);
+
+    auto Res = UniqueElements.try_emplace(V, I);
+    if (Res.second) {
+      EstimateInsertCost(I, V);
+      ShuffleMask[I] = I;
       continue;
     }
-    EstimateInsertCost(I, V);
+
+    DuplicateNonConst = true;
+    ShuffledElements.setBit(I);
+    ShuffleMask[I] = Res.first->second;
   }
   if (ForPoisonSrc)
     Cost =
         TTI->getScalarizationOverhead(VecTy, ~ShuffledElements, /*Insert*/ true,
                                       /*Extract*/ false, CostKind);
   if (DuplicateNonConst)
-    Cost +=
-        TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
+    Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+                                VecTy, ShuffleMask);
   return Cost;
 }
 
@@ -14708,13 +14729,13 @@ class HorizontalReduction {
       if (IsConstant)
         return ConstantFP::get(LHS->getType(),
                                maximum(cast<ConstantFP>(LHS)->getValueAPF(),
-                                      cast<ConstantFP>(RHS)->getValueAPF()));
+                                       cast<ConstantFP>(RHS)->getValueAPF()));
       return Builder.CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS);
     case RecurKind::FMinimum:
       if (IsConstant)
         return ConstantFP::get(LHS->getType(),
                                minimum(cast<ConstantFP>(LHS)->getValueAPF(),
-                                      cast<ConstantFP>(RHS)->getValueAPF()));
+                                       cast<ConstantFP>(RHS)->getValueAPF()));
       return Builder.CreateBinaryIntrinsic(Intrinsic::minimum, LHS, RHS);
     case RecurKind::SMax:
       if (IsConstant || UseSelect) {
diff --git a/llvm/test/Bitcode/dbg-record-roundtrip.ll b/llvm/test/Bitcode/dbg-record-roundtrip.ll
new file mode 100644
index 0000000..84606b5
--- /dev/null
+++ b/llvm/test/Bitcode/dbg-record-roundtrip.ll
@@ -0,0 +1,138 @@
+;; Roundtrip tests.
+; RUN: llvm-as --write-experimental-debuginfo-iterators-to-bitcode=true %s -o - | llvm-dis | FileCheck %s
+;; Check that verify-uselistorder passes regardless of input format.
+; RUN: llvm-as %s --write-experimental-debuginfo-iterators-to-bitcode=true -o - | verify-uselistorder
+; RUN: verify-uselistorder %s
+
+;; Confirm we're producing RemoveDI records from various tools.
+; RUN: opt %s -o - --write-experimental-debuginfo-iterators-to-bitcode=true | llvm-bcanalyzer - | FileCheck %s --check-prefix=BITCODE
+; RUN: llvm-as %s -o - --write-experimental-debuginfo-iterators-to-bitcode=true | llvm-bcanalyzer - | FileCheck %s --check-prefix=BITCODE
+; BITCODE-DAG: DEBUG_RECORD_LABEL
+; BITCODE-DAG: DEBUG_RECORD_VALUE
+; BITCODE-DAG: DEBUG_RECORD_ASSIGN
+; BITCODE-DAG: DEBUG_RECORD_DECLARE
+
+;; Check that llvm-link doesn't explode if we give it different formats to
+;; link.
+;; NOTE: This test fails intermittently on linux if the llvm-as output is piped
+;; into llvm-link in the RUN lines below, unless the verify-uselistorder RUN
+;; lines above are removed. Write to a temporary file to avoid that weirdness.
+;; NOTE2: Unfortunately, the above only stopped it occuring on my machine.
+;; It failed again intermittently here:
+;; https://lab.llvm.org/buildbot/#/builders/245/builds/21930
+;; Allow this test to fail-over twice, until this strangeness is understood.
+; ALLOW_RETRIES: 2
+; RUN: llvm-as %s --experimental-debuginfo-iterators=true --write-experimental-debuginfo-iterators-to-bitcode=true -o %t
+; RUN: llvm-link %t %s --experimental-debuginfo-iterators=false -o /dev/null
+; RUN: llvm-as %s --experimental-debuginfo-iterators=false -o %t
+; RUN: llvm-link %t %s --experimental-debuginfo-iterators=true
+
+;; Checks inline.
+
+@g = internal dso_local global i32 0, align 4, !dbg !0
+
+define internal dso_local noundef i32 @_Z3funv(i32 %p, ptr %storage) !dbg !13 {
+entry:
+;; Dbg record at top of block, check dbg.value configurations.
+; CHECK: entry:
+; CHECK-NEXT: dbg.value(metadata i32 %p, metadata ![[e:[0-9]+]], metadata !DIExpression()), !dbg ![[dbg:[0-9]+]]
+; CHECK-NEXT: dbg.value(metadata ![[empty:[0-9]+]], metadata ![[e]], metadata !DIExpression()), !dbg ![[dbg]]
+; CHECK-NEXT: dbg.value(metadata i32 poison, metadata ![[e]], metadata !DIExpression()), !dbg ![[dbg]]
+; CHECK-NEXT: dbg.value(metadata i32 1, metadata ![[f:[0-9]+]], metadata !DIExpression()), !dbg ![[dbg]]
+  tail call void @llvm.dbg.value(metadata i32 %p, metadata !32, metadata !DIExpression()), !dbg !19
+  tail call void @llvm.dbg.value(metadata !29, metadata !32, metadata !DIExpression()), !dbg !19
+  tail call void @llvm.dbg.value(metadata i32 poison, metadata !32, metadata !DIExpression()), !dbg !19
+  tail call void @llvm.dbg.value(metadata i32 1, metadata !33, metadata !DIExpression()), !dbg !19
+;; Arglist with an argument, constant, local use before def, poison.
+; CHECK-NEXT: dbg.value(metadata !DIArgList(i32 %p, i32 0, i32 %0, i32 poison), metadata ![[f]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_LLVM_arg, 2, DW_OP_LLVM_arg, 3, DW_OP_plus, DW_OP_minus)), !dbg ![[dbg]]
+  tail call void @llvm.dbg.value(metadata !DIArgList(i32 %p, i32 0, i32 %0, i32 poison), metadata !33, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_LLVM_arg, 2, DW_OP_LLVM_arg, 3, DW_OP_plus, DW_OP_minus)), !dbg !19
+;; Check dbg.assign use before def (value, addr and ID). Check expression order too.
+; CHECK: dbg.assign(metadata i32 %0, metadata ![[i:[0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 0),
+; CHECK-SAME:       metadata ![[ID:[0-9]+]], metadata ptr %a, metadata !DIExpression(DW_OP_plus_uconst, 1)), !dbg ![[dbg]]
+  tail call void @llvm.dbg.assign(metadata i32 %0, metadata !36, metadata !DIExpression(DW_OP_plus_uconst, 0), metadata !37, metadata ptr %a, metadata !DIExpression(DW_OP_plus_uconst, 1)), !dbg !19
+  %a = alloca i32, align 4, !DIAssignID !37
+; CHECK: %a = alloca i32, align 4, !DIAssignID ![[ID]]
+;; Check dbg.declare configurations.
+; CHECK-NEXT: dbg.declare(metadata ptr %a, metadata ![[a:[0-9]+]], metadata !DIExpression()), !dbg ![[dbg]]
+; CHECK-NEXT: dbg.declare(metadata ![[empty:[0-9]+]], metadata ![[b:[0-9]+]], metadata !DIExpression()), !dbg ![[dbg]]
+; CHECK-NEXT: dbg.declare(metadata ptr poison, metadata ![[c:[0-9]+]], metadata !DIExpression()), !dbg ![[dbg]]
+; CHECK-NEXT: dbg.declare(metadata ptr null, metadata ![[d:[0-9]+]], metadata !DIExpression()), !dbg ![[dbg]]
+; CHECK-NEXT: dbg.declare(metadata ptr @g, metadata ![[h:[0-9]+]], metadata !DIExpression()), !dbg ![[dbg]]
+  tail call void @llvm.dbg.declare(metadata ptr %a, metadata !17, metadata !DIExpression()), !dbg !19
+  tail call void @llvm.dbg.declare(metadata !29, metadata !28, metadata !DIExpression()), !dbg !19
+  tail call void @llvm.dbg.declare(metadata ptr poison, metadata !30, metadata !DIExpression()), !dbg !19
+  tail call void @llvm.dbg.declare(metadata ptr null, metadata !31, metadata !DIExpression()), !dbg !19
+  tail call void @llvm.dbg.declare(metadata ptr @g, metadata !35, metadata !DIExpression()), !dbg !19
+;; Argument value dbg.declare.
+; CHECK: dbg.declare(metadata ptr %storage, metadata ![[g:[0-9]+]], metadata !DIExpression()), !dbg ![[dbg]]
+  tail call void @llvm.dbg.declare(metadata ptr %storage, metadata !34, metadata !DIExpression()), !dbg !19
+;; Use before def dbg.value.
+; CHECK: dbg.value(metadata i32 %0, metadata ![[e]], metadata !DIExpression()), !dbg ![[dbg]]
+  tail call void @llvm.dbg.value(metadata i32 %0, metadata !32, metadata !DIExpression()), !dbg !19
+  %0 = load i32, ptr @g, align 4, !dbg !20
+;; Non-argument local value dbg.value.
+; CHECK: dbg.value(metadata i32 %0, metadata ![[e]], metadata !DIExpression()), !dbg ![[dbg]]
+  tail call void @llvm.dbg.value(metadata i32 %0, metadata !32, metadata !DIExpression()), !dbg !19
+  store i32 %0, ptr %a, align 4, !dbg !19
+  %1 = load i32, ptr %a, align 4, !dbg !25
+; CHECK: dbg.label(metadata ![[label:[0-9]+]]), !dbg ![[dbg]]
+  tail call void @llvm.dbg.label(metadata !38), !dbg !19
+  ret i32 %1, !dbg !27
+}
+
+; CHECK-DAG: ![[a]] = !DILocalVariable(name: "a",
+; CHECK-DAG: ![[b]] = !DILocalVariable(name: "b",
+; CHECK-DAG: ![[c]] = !DILocalVariable(name: "c",
+; CHECK-DAG: ![[d]] = !DILocalVariable(name: "d",
+; CHECK-DAG: ![[e]] = !DILocalVariable(name: "e",
+; CHECK-DAG: ![[f]] = !DILocalVariable(name: "f",
+; CHECK-DAG: ![[g]] = !DILocalVariable(name: "g",
+; CHECK-DAG: ![[h]] = !DILocalVariable(name: "h",
+; CHECK-DAG: ![[i]] = !DILocalVariable(name: "i",
+; CHECK-DAG: ![[empty]] = !{}
+; CHECK-DAG: ![[label]] = !DILabel
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata)
+declare void @llvm.dbg.label(metadata)
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!6, !7, !8, !9, !10, !11}
+!llvm.ident = !{!12}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "test.cpp", directory: "/")
+!4 = !{!0}
+!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!6 = !{i32 7, !"Dwarf Version", i32 5}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{i32 1, !"wchar_size", i32 4}
+!9 = !{i32 8, !"PIC Level", i32 2}
+!10 = !{i32 7, !"PIE Level", i32 2}
+!11 = !{i32 7, !"uwtable", i32 2}
+!12 = !{!"clang version 19.0.0"}
+!13 = distinct !DISubprogram(name: "fun", linkageName: "_Z3funv", scope: !3, file: !3, line: 2, type: !14, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !16)
+!14 = !DISubroutineType(types: !15)
+!15 = !{!5}
+!16 = !{!17}
+!17 = !DILocalVariable(name: "a", scope: !13, file: !3, line: 3, type: !5)
+!18 = !DILocation(line: 3, column: 3, scope: !13)
+!19 = !DILocation(line: 3, column: 7, scope: !13)
+!20 = !DILocation(line: 3, column: 11, scope: !13)
+!25 = !DILocation(line: 4, column: 12, scope: !13)
+!26 = !DILocation(line: 5, column: 1, scope: !13)
+!27 = !DILocation(line: 4, column: 5, scope: !13)
+!28 = !DILocalVariable(name: "b", scope: !13, file: !3, line: 3, type: !5)
+!29 = !{}
+!30 = !DILocalVariable(name: "c", scope: !13, file: !3, line: 3, type: !5)
+!31 = !DILocalVariable(name: "d", scope: !13, file: !3, line: 3, type: !5)
+!32 = !DILocalVariable(name: "e", scope: !13, file: !3, line: 3, type: !5)
+!33 = !DILocalVariable(name: "f", scope: !13, file: !3, line: 3, type: !5)
+!34 = !DILocalVariable(name: "g", scope: !13, file: !3, line: 3, type: !5)
+!35 = !DILocalVariable(name: "h", scope: !13, file: !3, line: 3, type: !5)
+!36 = !DILocalVariable(name: "i", scope: !13, file: !3, line: 3, type: !5)
+!37 = distinct !DIAssignID()
+!38 = !DILabel(scope: !13, name: "label", file: !3, line: 1)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
new file mode 100644
index 0000000..6fced31
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
@@ -0,0 +1,94 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-prelegalizer-combiner -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+
+---
+name:            add_unused
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: add_unused
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: %add:_(s32) = G_ADD [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $w0 = COPY %add(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %add:_(s32), %o:_(s1) = G_SADDO %0, %1
+    $w0 = COPY %add(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            add_canon
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: add_canon
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 10
+    ; CHECK-NEXT: %add:_(s32), %o:_(s1) = G_SADDO [[COPY]], %const
+    ; CHECK-NEXT: %o_wide:_(s32) = G_ZEXT %o(s1)
+    ; CHECK-NEXT: $w0 = COPY %add(s32)
+    ; CHECK-NEXT: $w1 = COPY %o_wide(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %const:_(s32) = G_CONSTANT i32 10
+    %add:_(s32), %o:_(s1) = G_SADDO %const, %1
+    %o_wide:_(s32) = G_ZEXT %o(s1)
+    $w0 = COPY %add(s32)
+    $w1 = COPY %o_wide
+    RET_ReallyLR implicit $w0
+...
+---
+name:            add_const_fold
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: add_const_fold
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %add:_(s32) = G_CONSTANT i32 21
+    ; CHECK-NEXT: %o_wide:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: $w0 = COPY %add(s32)
+    ; CHECK-NEXT: $w1 = COPY %o_wide(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %const:_(s32) = G_CONSTANT i32 10
+    %const1:_(s32) = G_CONSTANT i32 11
+    %add:_(s32), %o:_(s1) = G_UADDO %const, %const1
+    %o_wide:_(s32) = G_ZEXT %o(s1)
+    $w0 = COPY %add(s32)
+    $w1 = COPY %o_wide
+    RET_ReallyLR implicit $w0
+...
+---
+name:            add_add_zero
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: add_add_zero
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w2
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $w1 = COPY [[C]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %2:_(s32) = COPY $w2
+    %const:_(s32) = G_CONSTANT i32 10
+    %addl:_(s32) = nsw G_ADD  %2, %const
+    %const1:_(s32) = G_CONSTANT i32 -10
+    %add:_(s32), %o:_(s1) = G_SADDO %addl, %const1
+    %o_wide:_(s32) = G_ZEXT %o(s1)
+    $w0 = COPY %add(s32)
+    $w1 = COPY %o_wide
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-deinterleave2.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-deinterleave2.ll
new file mode 100644
index 0000000..10882a0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-deinterleave2.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -O0 -mtriple=aarch64-- --global-isel --global-isel-abort=2 --verify-machineinstrs --stop-after=irtranslator %s -o - | FileCheck %s
+
+define void @vector_deinterleave2_v4i32(<4 x i32> %a) {
+  ; CHECK-LABEL: name: vector_deinterleave2_v4i32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(0, 2)
+  ; CHECK-NEXT:   [[SHUF1:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(1, 3)
+  ; CHECK-NEXT:   RET_ReallyLR
+    %res = call {<2 x i32>, <2 x i32>} @llvm.experimental.vector.deinterleave2.v4i32(<4 x i32> %a)
+    ret void
+}
+
+define void @vector_deinterleave2_v8f32(<8 x float> %a) {
+  ; CHECK-LABEL: name: vector_deinterleave2_v8f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $q0, $q1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+  ; CHECK-NEXT:   [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[BITCAST1]](<4 x s32>)
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<8 x s32>), [[DEF]], shufflemask(0, 2, 4, 6)
+  ; CHECK-NEXT:   [[SHUF1:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<8 x s32>), [[DEF]], shufflemask(1, 3, 5, 7)
+  ; CHECK-NEXT:   RET_ReallyLR
+    %res = call {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float> %a)
+    ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-interleave2.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-interleave2.ll
new file mode 100644
index 0000000..f51e47a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-interleave2.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -O0 -mtriple=aarch64-- --global-isel --global-isel-abort=2 --verify-machineinstrs --stop-after=irtranslator %s -o - | FileCheck %s
+
+define void @vector_interleave2_v4i32(<2 x i32> %a, <2 x i32> %b) {
+  ; CHECK-LABEL: name: vector_interleave2_v4i32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0, $d1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(0, 2, 1, 3)
+  ; CHECK-NEXT:   RET_ReallyLR
+    %res = call <4 x i32> @llvm.experimental.vector.interleave2.v4i32(<2 x i32> %a, <2 x i32> %b)
+    ret void
+}
+
+define void @vector_interleave2_v8f32(<4 x float> %a, <4 x float> %b) {
+  ; CHECK-LABEL: name: vector_interleave2_v8f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $q0, $q1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+  ; CHECK-NEXT:   [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[BITCAST]](<4 x s32>), [[BITCAST1]], shufflemask(0, 4, 1, 5, 2, 6, 3, 7)
+  ; CHECK-NEXT:   RET_ReallyLR
+    %res = call <8 x float> @llvm.experimental.vector.interleave2.v8f32(<4 x float> %a, <4 x float> %b)
+    ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-addo-zero.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-addo-zero.mir
index 94f56e5..9483cbf 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-addo-zero.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-addo-zero.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple aarch64 -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombiner-only-enable-rule="addo_by_0" -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple aarch64 -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombiner-only-enable-rule="match_addos" -global-isel -verify-machineinstrs %s -o - | FileCheck %s
 # REQUIRES: asserts
 
 # (G_*ADDO x, 0) -> x + no carry
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
index 7b8448d..7cdb10e 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
@@ -1,23 +1,42 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
-; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s
-; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 target triple = "aarch64"
 
+; CHECK-GI:      warning: Instruction selection used fallback path for complex_add_v16f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for complex_add_v32f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for complex_add_v16f16_with_intrinsic
+
 ; Expected to not transform
 define <2 x half> @complex_add_v2f16(<2 x half> %a, <2 x half> %b) {
-; CHECK-LABEL: complex_add_v2f16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov h2, v0.h[1]
-; CHECK-NEXT:    mov h3, v1.h[1]
-; CHECK-NEXT:    fsub h1, h1, h2
-; CHECK-NEXT:    fadd h0, h3, h0
-; CHECK-NEXT:    mov v1.h[1], v0.h[0]
-; CHECK-NEXT:    fmov d0, d1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: complex_add_v2f16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-NEXT:    mov h3, v1.h[1]
+; CHECK-SD-NEXT:    fsub h1, h1, h2
+; CHECK-SD-NEXT:    fadd h0, h3, h0
+; CHECK-SD-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-SD-NEXT:    fmov d0, d1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: complex_add_v2f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-NEXT:    fsub h1, h1, h2
+; CHECK-GI-NEXT:    fadd h0, h3, h0
+; CHECK-GI-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    ret
 entry:
   %a.real = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 0>
   %a.imag = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 1>
@@ -162,17 +181,29 @@ entry:
 
 ; Expected not to transform as it is integer
 define <16 x i16> @complex_add_v16i16(<16 x i16> %a, <16 x i16> %b) {
-; CHECK-LABEL: complex_add_v16i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    uzp1 v4.8h, v2.8h, v3.8h
-; CHECK-NEXT:    uzp1 v5.8h, v0.8h, v1.8h
-; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    uzp2 v1.8h, v2.8h, v3.8h
-; CHECK-NEXT:    sub v2.8h, v4.8h, v0.8h
-; CHECK-NEXT:    add v1.8h, v1.8h, v5.8h
-; CHECK-NEXT:    zip1 v0.8h, v2.8h, v1.8h
-; CHECK-NEXT:    zip2 v1.8h, v2.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: complex_add_v16i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    uzp1 v4.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT:    uzp1 v5.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    uzp2 v1.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT:    sub v2.8h, v4.8h, v0.8h
+; CHECK-SD-NEXT:    add v1.8h, v1.8h, v5.8h
+; CHECK-SD-NEXT:    zip1 v0.8h, v2.8h, v1.8h
+; CHECK-SD-NEXT:    zip2 v1.8h, v2.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: complex_add_v16i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    uzp1 v4.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp2 v2.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    sub v1.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    add v2.8h, v2.8h, v4.8h
+; CHECK-GI-NEXT:    zip1 v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT:    zip2 v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %a.real = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %a.imag = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
new file mode 100644
index 0000000..c5c525a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -0,0 +1,1126 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI:       warning: Instruction selection used fallback path for extract_v4i32_vector_insert
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for extract_v4i32_vector_insert_const
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for extract_v4i32_vector_extract
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for extract_v4i32_vector_extract_const
+
+define i64 @extract_v2i64_undef_index(<2 x i64> %a, i32 %c) {
+; CHECK-SD-LABEL: extract_v2i64_undef_index:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov x0, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v2i64_undef_index:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    str q0, [sp, #-16]!
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    ldr x0, [sp], #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %d = extractelement <2 x i64> %a, i32 undef
+  ret i64 %d
+}
+
+define i64 @extract_v2i64_undef_vector(<2 x i64> %a, i32 %c) {
+; CHECK-SD-LABEL: extract_v2i64_undef_vector:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v2i64_undef_vector:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    mov w9, w0
+; CHECK-GI-NEXT:    mov x8, sp
+; CHECK-GI-NEXT:    and x9, x9, #0x1
+; CHECK-GI-NEXT:    ldr x0, [x8, x9, lsl #3]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %d = extractelement <2 x i64> undef, i32 %c
+  ret i64 %d
+}
+
+define i64 @extract_v2i64_opaque(<2 x i64> %a, i32 %c) {
+; CHECK-SD-LABEL: extract_v2i64_opaque:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
+; CHECK-SD-NEXT:    ldr x0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v2i64_opaque:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    mov w9, w0
+; CHECK-GI-NEXT:    mov x8, sp
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    and x9, x9, #0x1
+; CHECK-GI-NEXT:    ldr x0, [x8, x9, lsl #3]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %d = extractelement <2 x i64> %a, i32 %c
+  ret i64 %d
+}
+
+define i64 @extract_v2i64_oob(<2 x i64> %a, i32 %c) {
+; CHECK-LABEL: extract_v2i64_oob:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
+entry:
+  %d = extractelement <2 x i64> %a, i32 5
+  ret i64 %d
+}
+
+define i64 @extract_v2i64_freeze(<2 x i64> %a, i32 %c) {
+; CHECK-SD-LABEL: extract_v2i64_freeze:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
+; CHECK-SD-NEXT:    ldr x0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v2i64_freeze:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    mov w9, w0
+; CHECK-GI-NEXT:    mov x8, sp
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    and x9, x9, #0x1
+; CHECK-GI-NEXT:    ldr x0, [x8, x9, lsl #3]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %fvector = freeze <2 x i64> %a
+  %d = extractelement <2 x i64> %fvector, i32 %c
+  ret i64 %d
+}
+
+define i64 @extract_v2i64_extract_of_insert(<2 x i64> %a, i64 %element, i64 %c) {
+; CHECK-LABEL: extract_v2i64_extract_of_insert:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
+entry:
+  %vector = insertelement <2 x i64> %a, i64 %element, i64 %c
+  %d = extractelement <2 x i64> %vector, i64 %c
+  ret i64 %d
+}
+
+define i64 @extract_v2i64_extract_of_insert_different_const(<2 x i64> %a, i64 %element) {
+; CHECK-SD-LABEL: extract_v2i64_extract_of_insert_different_const:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    mov x0, v0.d[1]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v2i64_extract_of_insert_different_const:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov v0.d[0], x0
+; CHECK-GI-NEXT:    mov d0, v0.d[1]
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = insertelement <2 x i64> %a, i64 %element, i64 0
+  %d = extractelement <2 x i64> %vector, i64 1
+  ret i64 %d
+}
+
+define i64 @extract_v2i64_extract_build_vector_const(<2 x i64> %a, i32 %c) {
+; CHECK-LABEL: extract_v2i64_extract_build_vector_const:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, #11 // =0xb
+; CHECK-NEXT:    ret
+entry:
+  %d = extractelement <2 x i64> <i64 42, i64 11>, i32 1
+  ret i64 %d
+}
+
+define i64 @extract_v2i64_extract_build_vector_opaque(<2 x i64> %a, i32 %c) {
+; CHECK-SD-LABEL: extract_v2i64_extract_build_vector_opaque:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    adrp x8, .LCPI8_0
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI8_0]
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr x0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v2i64_extract_build_vector_opaque:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    adrp x8, .LCPI8_0
+; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    and x8, x8, #0x1
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr x0, [x9, x8, lsl #3]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %d = extractelement <2 x i64> <i64 42, i64 11>, i32 %c
+  ret i64 %d
+}
+
+
+define i64 @extract_v2i32_zext(<2 x i32> %a, i32 %c) {
+; CHECK-SD-LABEL: extract_v2i32_zext:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr x0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v2i32_zext:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    mov w9, w0
+; CHECK-GI-NEXT:    mov x8, sp
+; CHECK-GI-NEXT:    and x9, x9, #0x1
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr x0, [x8, x9, lsl #3]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %zvector = zext <2 x i32> %a to <2 x i64>
+  %d = extractelement <2 x i64> %zvector, i32 %c
+  ret i64 %d
+}
+
+define i64 @extract_v2double_fptosi(<2 x double> %a, i32 %c) {
+; CHECK-SD-LABEL: extract_v2double_fptosi:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr x0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v2double_fptosi:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT:    mov w9, w0
+; CHECK-GI-NEXT:    mov x8, sp
+; CHECK-GI-NEXT:    and x9, x9, #0x1
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr x0, [x8, x9, lsl #3]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = fptosi <2 x double> %a to <2 x i64>
+  %d = extractelement <2 x i64> %vector, i32 %c
+  ret i64 %d
+}
+
+define double @extract_v2double_fneg(<2 x double> %a, i32 %c) {
+; CHECK-SD-LABEL: extract_v2double_fneg:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    fneg v0.2d, v0.2d
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr d0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v2double_fneg:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    fneg v0.2d, v0.2d
+; CHECK-GI-NEXT:    mov w9, w0
+; CHECK-GI-NEXT:    mov x8, sp
+; CHECK-GI-NEXT:    and x9, x9, #0x1
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr d0, [x8, x9, lsl #3]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = fneg <2 x double> %a
+  %d = extractelement <2 x double> %vector, i32 %c
+  ret double %d
+}
+
+define i32 @extract_v4i32_add(<4 x i32> %a, <4 x i32> %b, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_add:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    adrp x8, .LCPI12_0
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI12_0]
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
+; CHECK-SD-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr w0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_add:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    adrp x8, .LCPI12_0
+; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI12_0]
+; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    and x8, x8, #0x3
+; CHECK-GI-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = add <4 x i32> %a, <i32 42, i32 11, i32 17, i32 6>
+  %d = extractelement <4 x i32> %vector, i32 %c
+  ret i32 %d
+}
+
+define float @extract_v4i32_minimum(<4 x float> %a, <4 x float> %b, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_minimum:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    fmin v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr s0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_minimum:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    fmin v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    and x8, x8, #0x3
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr s0, [x9, x8, lsl #2]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = call <4 x float> @llvm.minimum.v4float(<4 x float> %a, <4 x float> %b)
+  %d = extractelement <4 x float> %vector, i32 %c
+  ret float %d
+}
+
+define float @extract_v4i32_minimum_build_vector(<4 x float> %a, <4 x float> %b, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_minimum_build_vector:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    adrp x8, .LCPI14_0
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
+; CHECK-SD-NEXT:    fmin v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr s0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_minimum_build_vector:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
+; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    and x8, x8, #0x3
+; CHECK-GI-NEXT:    fmin v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr s0, [x9, x8, lsl #2]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = call <4 x float> @llvm.minimum.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>)
+  %d = extractelement <4 x float> %vector, i32 %c
+  ret float %d
+}
+
+define float @extract_v4i32_minimum_build_vector_const(<4 x float> %a, <4 x float> %b, i32 %c) {
+; CHECK-LABEL: extract_v4i32_minimum_build_vector_const:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI15_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI15_0]
+; CHECK-NEXT:    fmin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mov s0, v0.s[1]
+; CHECK-NEXT:    ret
+entry:
+  %vector = call <4 x float> @llvm.minimum.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>)
+  %d = extractelement <4 x float> %vector, i32 1
+  ret float %d
+}
+
+define float @extract_v4i32_copysign_build_vector(<4 x float> %a, <4 x float> %b, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_copysign_build_vector:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
+; CHECK-SD-NEXT:    mvni v1.4s, #128, lsl #24
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI16_0]
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
+; CHECK-SD-NEXT:    bif v0.16b, v2.16b, v1.16b
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr s0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_copysign_build_vector:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    mvni v1.4s, #128, lsl #24
+; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    and x8, x8, #0x3
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr s0, [x9, x8, lsl #2]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = call <4 x float> @llvm.copysign.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>)
+  %d = extractelement <4 x float> %vector, i32 %c
+  ret float %d
+}
+
+define float @extract_v4i32_copysign_build_vector_const(<4 x float> %a, <4 x float> %b, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_copysign_build_vector_const:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    adrp x8, .LCPI17_0
+; CHECK-SD-NEXT:    mvni v1.4s, #128, lsl #24
+; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI17_0]
+; CHECK-SD-NEXT:    bif v0.16b, v2.16b, v1.16b
+; CHECK-SD-NEXT:    mov s0, v0.s[2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_copysign_build_vector_const:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mvni v1.4s, #128, lsl #24
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    mov s0, v0.s[2]
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = call <4 x float> @llvm.copysign.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>)
+  %d = extractelement <4 x float> %vector, i32 2
+  ret float %d
+}
+
+
+define i32 @extract_v4i32_icmp(<4 x i32> %a, <4 x i32> %b, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_icmp:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    adrp x8, .LCPI18_0
+; CHECK-SD-NEXT:    movi v2.4s, #1
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI18_0]
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
+; CHECK-SD-NEXT:    cmge v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr w0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_icmp:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    adrp x8, .LCPI18_0
+; CHECK-GI-NEXT:    movi v2.4s, #1
+; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI18_0]
+; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    and x8, x8, #0x3
+; CHECK-GI-NEXT:    cmge v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = icmp sle <4 x i32> %a, <i32 42, i32 11, i32 17, i32 6>
+  %zvector = zext <4 x i1> %vector to <4 x i32>
+  %d = extractelement <4 x i32> %zvector, i32 %c
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_icmp_const(<4 x i32> %a, <4 x i32> %b, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_icmp_const:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    adrp x8, .LCPI19_0
+; CHECK-SD-NEXT:    movi v2.4s, #1
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI19_0]
+; CHECK-SD-NEXT:    cmge v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT:    mov w0, v0.s[2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_icmp_const:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI19_0
+; CHECK-GI-NEXT:    movi v2.4s, #1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI19_0]
+; CHECK-GI-NEXT:    cmge v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT:    mov s0, v0.s[2]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = icmp sle <4 x i32> %a, <i32 42, i32 11, i32 17, i32 6>
+  %zvector = zext <4 x i1> %vector to <4 x i32>
+  %d = extractelement <4 x i32> %zvector, i32 2
+  ret i32 %d
+}
+
+define i32 @extract_v4float_fcmp(<4 x float> %a, <4 x float> %b, i32 %c) {
+; CHECK-SD-LABEL: extract_v4float_fcmp:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    movi v1.4s, #1
+; CHECK-SD-NEXT:    fcmeq v0.4s, v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
+; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr w0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4float_fcmp:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    fmov v1.4s, #1.00000000
+; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    and x8, x8, #0x3
+; CHECK-GI-NEXT:    fcmge v2.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    movi v1.4s, #1
+; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = fcmp uno <4 x float> %a, <float 1.0, float 1.0, float 1.0, float 1.0>
+  %zvector = zext <4 x i1> %vector to <4 x i32>
+  %d = extractelement <4 x i32> %zvector, i32 %c
+  ret i32 %d
+}
+
+define i32 @extract_v4float_fcmp_const(<4 x float> %a, <4 x float> %b, i32 %c) {
+; CHECK-SD-LABEL: extract_v4float_fcmp_const:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi v1.4s, #1
+; CHECK-SD-NEXT:    fcmeq v0.4s, v0.4s, v0.4s
+; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT:    mov w0, v0.s[1]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4float_fcmp_const:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov v1.4s, #1.00000000
+; CHECK-GI-NEXT:    fcmge v2.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    movi v1.4s, #1
+; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT:    mov s0, v0.s[1]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = fcmp uno <4 x float> %a, <float 1.0, float 1.0, float 1.0, float 1.0>
+  %zvector = zext <4 x i1> %vector to <4 x i32>
+  %d = extractelement <4 x i32> %zvector, i32 1
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_select(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %cond) {
+; CHECK-SD-LABEL: extract_v4i32_select:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    ushll v1.4s, v2.4h, #0
+; CHECK-SD-NEXT:    adrp x8, .LCPI22_0
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI22_0]
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
+; CHECK-SD-NEXT:    shl v1.4s, v1.4s, #31
+; CHECK-SD-NEXT:    cmlt v1.4s, v1.4s, #0
+; CHECK-SD-NEXT:    bif v0.16b, v2.16b, v1.16b
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr w0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_select:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    ushll v1.4s, v2.4h, #0
+; CHECK-GI-NEXT:    adrp x8, .LCPI22_0
+; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI22_0]
+; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    and x8, x8, #0x3
+; CHECK-GI-NEXT:    shl v1.4s, v1.4s, #31
+; CHECK-GI-NEXT:    sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v1.16b
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> <i32 42, i32 11, i32 17, i32 6>
+  %d = extractelement <4 x i32> %vector, i32 %c
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_select_const(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %cond) {
+; CHECK-SD-LABEL: extract_v4i32_select_const:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll v1.4s, v2.4h, #0
+; CHECK-SD-NEXT:    movi v2.4s, #17
+; CHECK-SD-NEXT:    shl v1.4s, v1.4s, #31
+; CHECK-SD-NEXT:    cmlt v1.4s, v1.4s, #0
+; CHECK-SD-NEXT:    bif v0.16b, v2.16b, v1.16b
+; CHECK-SD-NEXT:    mov w0, v0.s[2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_select_const:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ushll v1.4s, v2.4h, #0
+; CHECK-GI-NEXT:    adrp x8, .LCPI23_0
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI23_0]
+; CHECK-GI-NEXT:    shl v1.4s, v1.4s, #31
+; CHECK-GI-NEXT:    sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v1.16b
+; CHECK-GI-NEXT:    mov s0, v0.s[2]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> <i32 42, i32 11, i32 17, i32 6>
+  %d = extractelement <4 x i32> %vector, i32 2
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_abs(<4 x float> %a, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_abs:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    frintp v0.4s, v0.4s
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
+; CHECK-SD-NEXT:    frintm v0.4s, v0.4s
+; CHECK-SD-NEXT:    fabs v0.4s, v0.4s
+; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    abs v0.4s, v0.4s
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr w0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_abs:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    frintp v0.4s, v0.4s
+; CHECK-GI-NEXT:    mov w9, w0
+; CHECK-GI-NEXT:    mov x8, sp
+; CHECK-GI-NEXT:    and x9, x9, #0x3
+; CHECK-GI-NEXT:    frintm v0.4s, v0.4s
+; CHECK-GI-NEXT:    fabs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr w0, [x8, x9, lsl #2]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %ceil = call <4 x float> @llvm.ceil.v4float(<4 x float> %a)
+  %floor = call <4 x float> @llvm.floor.v4float(<4 x float> %ceil)
+  %fabs = call <4 x float> @llvm.fabs.v4float(<4 x float> %floor)
+  %abs = fptosi <4 x float> %fabs to <4 x i32>
+  %vector = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abs, i1 0)
+  %d = extractelement <4 x i32> %vector, i32 %c
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_abs_const(<4 x float> %a, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_abs_const:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    mov w0, #4 // =0x4
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_abs_const:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI25_0
+; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI25_0]
+; CHECK-GI-NEXT:    frintp v0.4s, v0.4s
+; CHECK-GI-NEXT:    frintm v0.4s, v0.4s
+; CHECK-GI-NEXT:    fabs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    mov s0, v0.s[1]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
+entry:
+  %ceil = call <4 x float> @llvm.ceil.v4float(<4 x float> <float 1.0, float 4.0, float 3.0, float 2.0>)
+  %floor = call <4 x float> @llvm.floor.v4float(<4 x float> %ceil)
+  %fabs = call <4 x float> @llvm.fabs.v4float(<4 x float> %floor)
+  %abs = fptosi <4 x float> %fabs to <4 x i32>
+  %vector = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abs, i1 0)
+  %d = extractelement <4 x i32> %vector, i32 1
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_abs_half_const(<4 x float> %a, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_abs_half_const:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    adrp x8, .LCPI26_0
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI26_0]
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr w0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_abs_half_const:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    adrp x8, .LCPI26_0
+; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI26_0]
+; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    and x8, x8, #0x3
+; CHECK-GI-NEXT:    frintp v0.4s, v0.4s
+; CHECK-GI-NEXT:    frintm v0.4s, v0.4s
+; CHECK-GI-NEXT:    fabs v0.4s, v0.4s
+; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %ceil = call <4 x float> @llvm.ceil.v4float(<4 x float> <float 1.0, float 4.0, float 3.0, float 2.0>)
+  %floor = call <4 x float> @llvm.floor.v4float(<4 x float> %ceil)
+  %fabs = call <4 x float> @llvm.fabs.v4float(<4 x float> %floor)
+  %abs = fptosi <4 x float> %fabs to <4 x i32>
+  %vector = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abs, i1 0)
+  %d = extractelement <4 x i32> %vector, i32 %c
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_vector_insert(<4 x i32> %a, <2 x i32> %b, i32 %c) {
+; CHECK-LABEL: extract_v4i32_vector_insert:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    bfi x8, x0, #2, #2
+; CHECK-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-NEXT:    str q1, [sp]
+; CHECK-NEXT:    ldr w0, [x8]
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
+entry:
+  %vector = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0)
+  %d = extractelement <4 x i32> %vector, i32 %c
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_vector_insert_const(<4 x i32> %a, <2 x i32> %b, i32 %c) {
+; CHECK-LABEL: extract_v4i32_vector_insert_const:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    mov w0, v1.s[1]
+; CHECK-NEXT:    ret
+entry:
+  %vector = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0)
+  %d = extractelement <4 x i32> %vector, i32 1
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_vector_extract(<4 x i32> %a, <2 x i32> %b, i32 %c) {
+; CHECK-LABEL: extract_v4i32_vector_extract:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    str q0, [sp]
+; CHECK-NEXT:    bfi x8, x0, #2, #2
+; CHECK-NEXT:    ldr w0, [x8]
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
+entry:
+  %vector = call <4 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %a, i64 0)
+  %d = extractelement <4 x i32> %vector, i32 %c
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_vector_extract_const(<4 x i32> %a, <2 x i32> %b, i32 %c) {
+; CHECK-LABEL: extract_v4i32_vector_extract_const:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %vector = call <4 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %a, i64 0)
+  %d = extractelement <4 x i32> %vector, i32 0
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_load(<4 x i32> %a, <2 x i32> %b, i32 %c, ptr %arg) {
+; CHECK-SD-LABEL: extract_v4i32_load:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    and x8, x0, #0x3
+; CHECK-SD-NEXT:    ldr w0, [x1, x8, lsl #2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_load:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    and x8, x8, #0x3
+; CHECK-GI-NEXT:    ldr w0, [x1, x8, lsl #2]
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = load  <4 x i32>, ptr %arg
+  %d = extractelement <4 x i32> %vector, i32 %c
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_load_const(<4 x i32> %a, <2 x i32> %b, i32 %c, ptr %arg) {
+; CHECK-LABEL: extract_v4i32_load_const:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr w0, [x1]
+; CHECK-NEXT:    ret
+entry:
+  %vector = load  <4 x i32>, ptr %arg
+  %d = extractelement <4 x i32> %vector, i32 0
+  ret i32 %d
+}
+
+define double @extract_v4i32_bitcast(<4 x i32> %a, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_bitcast:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
+; CHECK-SD-NEXT:    ldr d0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_bitcast:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    mov w9, w0
+; CHECK-GI-NEXT:    mov x8, sp
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    and x9, x9, #0x1
+; CHECK-GI-NEXT:    ldr d0, [x8, x9, lsl #3]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = bitcast <4 x i32> %a to <2 x double>
+  %d = extractelement <2 x double> %vector, i32 %c
+  ret double %d
+}
+
+define double @extract_v4i32_bitcast_const(<4 x i32> %a, i32 %c) {
+; CHECK-LABEL: extract_v4i32_bitcast_const:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+entry:
+  %vector = bitcast <4 x i32> %a to <2 x double>
+  %d = extractelement <2 x double> %vector, i32 0
+  ret double %d
+}
+
+define i32 @extract_v4i32_shuffle(<4 x i32> %a, <4 x i32> %b, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_shuffle:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    uzp1 v1.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
+; CHECK-SD-NEXT:    mov v1.s[3], v0.s[3]
+; CHECK-SD-NEXT:    str q1, [sp]
+; CHECK-SD-NEXT:    ldr w0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_shuffle:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    adrp x8, .LCPI35_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    and x8, x8, #0x3
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 3>
+  %d = extractelement <4 x i32> %vector, i32 %c
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_shuffle_const(<4 x i32> %a, <4 x i32> %b, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_shuffle_const:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov w0, s1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_shuffle_const:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI36_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI36_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    mov s0, v0.s[2]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 3>
+  %d = extractelement <4 x i32> %vector, i32 2
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_splat(<4 x i32> %a, <2 x i32> %b, i32 %c) {
+; CHECK-SD-LABEL: extract_v4i32_splat:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    movi v0.4s, #11
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr w0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_splat:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    movi v0.4s, #11
+; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    and x8, x8, #0x3
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %d = extractelement <4 x i32> splat (i32 11), i32 %c
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_splat_const(<4 x i32> %a, <2 x i32> %b, i32 %c) {
+; CHECK-LABEL: extract_v4i32_splat_const:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, #11 // =0xb
+; CHECK-NEXT:    ret
+entry:
+  %d = extractelement <4 x i32> splat (i32 11), i32 0
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_vp_add(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %mask, i32 %evl) {
+; CHECK-SD-LABEL: extract_v4i32_vp_add:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #16
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    ldr w0, [x8]
+; CHECK-SD-NEXT:    add sp, sp, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_vp_add:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #16
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    and x8, x8, #0x3
+; CHECK-GI-NEXT:    str q0, [sp]
+; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
+; CHECK-GI-NEXT:    add sp, sp, #16
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+  %d = extractelement <4 x i32> %vector, i32 %c
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_vp_add_const(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %mask, i32 %evl) {
+; CHECK-SD-LABEL: extract_v4i32_vp_add_const:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    mov w0, v0.s[3]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_vp_add_const:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    mov s0, v0.s[3]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
+entry:
+  %vector = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+  %d = extractelement <4 x i32> %vector, i32 3
+  ret i32 %d
+}
+
+define i32 @extract_v4i32_phi(i64 %val, i32  %limit, ptr %ptr) {
+; CHECK-SD-LABEL: extract_v4i32_phi:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    dup v1.2s, w0
+; CHECK-SD-NEXT:    adrp x8, .LCPI41_0
+; CHECK-SD-NEXT:    movi v0.2s, #16
+; CHECK-SD-NEXT:    ldr d2, [x8, :lo12:.LCPI41_0]
+; CHECK-SD-NEXT:    add v1.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT:  .LBB41_1: // %loop
+; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-SD-NEXT:    fmov w8, s1
+; CHECK-SD-NEXT:    add v1.2s, v1.2s, v0.2s
+; CHECK-SD-NEXT:    cmp w8, w1
+; CHECK-SD-NEXT:    add w0, w8, #10
+; CHECK-SD-NEXT:    str w0, [x2, w8, sxtw #2]
+; CHECK-SD-NEXT:    b.lo .LBB41_1
+; CHECK-SD-NEXT:  // %bb.2: // %ret
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4i32_phi:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI41_0
+; CHECK-GI-NEXT:    dup v0.2d, x0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI41_0]
+; CHECK-GI-NEXT:    add v1.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    movi v0.2s, #16
+; CHECK-GI-NEXT:    xtn v1.2s, v1.2d
+; CHECK-GI-NEXT:  .LBB41_1: // %loop
+; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-GI-NEXT:    fmov w8, s1
+; CHECK-GI-NEXT:    fmov w9, s1
+; CHECK-GI-NEXT:    add v1.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT:    cmp w8, w1
+; CHECK-GI-NEXT:    add w0, w9, #10
+; CHECK-GI-NEXT:    str w0, [x2, w8, sxtw #2]
+; CHECK-GI-NEXT:    b.lo .LBB41_1
+; CHECK-GI-NEXT:  // %bb.2: // %ret
+; CHECK-GI-NEXT:    ret
+entry:
+  %tempvector = insertelement <2 x i64> undef, i64 %val, i32 0
+  %vector = shufflevector <2 x i64> %tempvector, <2 x i64> undef, <2 x i32> zeroinitializer
+  %0 = add <2 x i64> %vector, <i64 1, i64 2>
+  %1 = trunc <2 x i64> %0 to <2 x i32>
+  br label %loop
+
+loop:
+  %2 = phi <2 x i32> [ %1, %entry ], [ %inc, %loop ]
+  %elt = extractelement <2 x i32> %2, i32 0
+  %end = icmp ult i32 %elt, %limit
+  %3 = add i32 10, %elt
+  %4 = sext i32 %elt to i64
+  %5 = getelementptr i32, ptr %ptr, i64 %4
+  store i32 %3, ptr %5
+  %inc = add <2 x i32> %2, <i32 16, i32 16>
+  br i1 %end, label %loop, label %ret
+
+ret:
+  ret i32 %3
+}
+
+
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
index 1b1cfea..2ad5623 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
@@ -1,29 +1,50 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec) {
-; CHECK-LABEL: vector_deinterleave_v2f16_v4f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    dup v2.2s, v0.s[1]
-; CHECK-NEXT:    mov v1.16b, v2.16b
-; CHECK-NEXT:    mov v1.h[0], v0.h[1]
-; CHECK-NEXT:    mov v0.h[1], v2.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: vector_deinterleave_v2f16_v4f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    dup v2.2s, v0.s[1]
+; CHECK-SD-NEXT:    mov v1.16b, v2.16b
+; CHECK-SD-NEXT:    mov v1.h[0], v0.h[1]
+; CHECK-SD-NEXT:    mov v0.h[1], v2.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vector_deinterleave_v2f16_v4f16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    uzp1 v2.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    uzp2 v1.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    mov h0, v2.h[1]
+; CHECK-GI-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-NEXT:    mov v2.h[1], v0.h[0]
+; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    fmov d0, d2
+; CHECK-GI-NEXT:    ret
   %retval = call {<2 x half>, <2 x half>} @llvm.experimental.vector.deinterleave2.v4f16(<4 x half> %vec)
   ret {<2 x half>, <2 x half>}   %retval
 }
 
 define {<4 x half>, <4 x half>} @vector_deinterleave_v4f16_v8f16(<8 x half> %vec) {
-; CHECK-LABEL: vector_deinterleave_v4f16_v8f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    uzp1 v2.4h, v0.4h, v1.4h
-; CHECK-NEXT:    uzp2 v1.4h, v0.4h, v1.4h
-; CHECK-NEXT:    fmov d0, d2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: vector_deinterleave_v4f16_v8f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    uzp1 v2.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    uzp2 v1.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    fmov d0, d2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vector_deinterleave_v4f16_v8f16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    uzp1 v2.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    uzp2 v1.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    fmov d0, d2
+; CHECK-GI-NEXT:    ret
   %retval = call {<4 x half>, <4 x half>} @llvm.experimental.vector.deinterleave2.v8f16(<8 x half> %vec)
   ret {<4 x half>, <4 x half>}   %retval
 }
@@ -40,13 +61,21 @@ define {<8 x half>, <8 x half>} @vector_deinterleave_v8f16_v16f16(<16 x half> %v
 }
 
 define {<2 x float>, <2 x float>} @vector_deinterleave_v2f32_v4f32(<4 x float> %vec) {
-; CHECK-LABEL: vector_deinterleave_v2f32_v4f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    zip1 v2.2s, v0.2s, v1.2s
-; CHECK-NEXT:    zip2 v1.2s, v0.2s, v1.2s
-; CHECK-NEXT:    fmov d0, d2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: vector_deinterleave_v2f32_v4f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    zip1 v2.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    zip2 v1.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    fmov d0, d2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vector_deinterleave_v2f32_v4f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    uzp1 v2.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp2 v1.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    fmov d0, d2
+; CHECK-GI-NEXT:    ret
   %retval = call {<2 x float>, <2 x float>} @llvm.experimental.vector.deinterleave2.v4f32(<4 x float> %vec)
   ret {<2 x float>, <2 x float>}   %retval
 }
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
index 071c1ff..eb81aff 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) {
 ; CHECK-LABEL: interleave2_v4f16:
@@ -11,15 +12,22 @@ define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) {
 }
 
 define <8 x half> @interleave2_v8f16(<4 x half> %vec0, <4 x half> %vec1) {
-; CHECK-LABEL: interleave2_v8f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    adrp x8, .LCPI1_0
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: interleave2_v8f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    adrp x8, .LCPI1_0
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: interleave2_v8f16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    zip1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %retval = call <8 x half> @llvm.experimental.vector.interleave2.v8f16(<4 x half> %vec0, <4 x half> %vec1)
   ret <8 x half> %retval
 }
@@ -36,14 +44,21 @@ define <16 x half> @interleave2_v16f16(<8 x half> %vec0, <8 x half> %vec1) {
 }
 
 define <4 x float> @interleave2_v4f32(<2 x float> %vec0, <2 x float> %vec1) {
-; CHECK-LABEL: interleave2_v4f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    rev64 v1.4s, v0.4s
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: interleave2_v4f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    rev64 v1.4s, v0.4s
+; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: interleave2_v4f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    zip1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    ret
   %retval = call <4 x float> @llvm.experimental.vector.interleave2.v4f32(<2 x float> %vec0, <2 x float> %vec1)
   ret <4 x float> %retval
 }
diff --git a/llvm/test/CodeGen/AArch64/overflow.ll b/llvm/test/CodeGen/AArch64/overflow.ll
index 444aaeb..1fd60c0 100644
--- a/llvm/test/CodeGen/AArch64/overflow.ll
+++ b/llvm/test/CodeGen/AArch64/overflow.ll
@@ -19,20 +19,12 @@ entry:
 }
 
 define zeroext i1 @saddo1.i32.fold(i32 %v1, i32 %v2, ptr %res) {
-; SDAG-LABEL: saddo1.i32.fold:
-; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #20 // =0x14
-; SDAG-NEXT:    mov w0, wzr
-; SDAG-NEXT:    str w8, [x2]
-; SDAG-NEXT:    ret
-;
-; GISEL-LABEL: saddo1.i32.fold:
-; GISEL:       // %bb.0: // %entry
-; GISEL-NEXT:    mov w8, #9 // =0x9
-; GISEL-NEXT:    adds w8, w8, #11
-; GISEL-NEXT:    cset w0, vs
-; GISEL-NEXT:    str w8, [x2]
-; GISEL-NEXT:    ret
+; CHECK-LABEL: saddo1.i32.fold:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #20 // =0x14
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    str w8, [x2]
+; CHECK-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 9, i32 11)
   %val = extractvalue {i32, i1} %t, 0
@@ -123,18 +115,11 @@ entry:
 }
 
 define zeroext i1 @saddo.canon.i32(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, ptr %res) {
-; SDAG-LABEL: saddo.canon.i32:
-; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w0, wzr
-; SDAG-NEXT:    str w4, [x5]
-; SDAG-NEXT:    ret
-;
-; GISEL-LABEL: saddo.canon.i32:
-; GISEL:       // %bb.0: // %entry
-; GISEL-NEXT:    adds w8, wzr, w4
-; GISEL-NEXT:    cset w0, vs
-; GISEL-NEXT:    str w8, [x5]
-; GISEL-NEXT:    ret
+; CHECK-LABEL: saddo.canon.i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    str w4, [x5]
+; CHECK-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 0, i32 %v5)
   %val = extractvalue {i32, i1} %t, 0
@@ -143,13 +128,19 @@ entry:
   ret i1 %obit
 }
 define zeroext i1 @saddo.add.i32(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, ptr %res) {
-; CHECK-LABEL: saddo.add.i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    add w8, w4, #100
-; CHECK-NEXT:    subs w8, w8, #100
-; CHECK-NEXT:    cset w0, vs
-; CHECK-NEXT:    str w8, [x5]
-; CHECK-NEXT:    ret
+; SDAG-LABEL: saddo.add.i32:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    add w8, w4, #100
+; SDAG-NEXT:    subs w8, w8, #100
+; SDAG-NEXT:    cset w0, vs
+; SDAG-NEXT:    str w8, [x5]
+; SDAG-NEXT:    ret
+;
+; GISEL-LABEL: saddo.add.i32:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    mov w0, wzr
+; GISEL-NEXT:    str w4, [x5]
+; GISEL-NEXT:    ret
 entry:
   %lhs = add nsw i32 %v5, 100
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %lhs, i32 -100)
@@ -160,13 +151,20 @@ entry:
 }
 
 define zeroext i1 @uaddo.add.i32(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, ptr %res) {
-; CHECK-LABEL: uaddo.add.i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    add w8, w4, #5
-; CHECK-NEXT:    adds w8, w8, #5
-; CHECK-NEXT:    cset w0, hs
-; CHECK-NEXT:    str w8, [x5]
-; CHECK-NEXT:    ret
+; SDAG-LABEL: uaddo.add.i32:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    add w8, w4, #5
+; SDAG-NEXT:    adds w8, w8, #5
+; SDAG-NEXT:    cset w0, hs
+; SDAG-NEXT:    str w8, [x5]
+; SDAG-NEXT:    ret
+;
+; GISEL-LABEL: uaddo.add.i32:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    adds w8, w4, #10
+; GISEL-NEXT:    cset w0, hs
+; GISEL-NEXT:    str w8, [x5]
+; GISEL-NEXT:    ret
 entry:
   %lhs = add nuw i32 %v5, 5
   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %lhs, i32 5)
diff --git a/llvm/test/CodeGen/AArch64/sme-machine-licm-vg.mir b/llvm/test/CodeGen/AArch64/sme-machine-licm-vg.mir
new file mode 100644
index 0000000..e6cce9a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-machine-licm-vg.mir
@@ -0,0 +1,64 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=aarch64--linux-gnu -run-pass=early-machinelicm %s -verify-machineinstrs -o - | FileCheck %s
+---
+name:            test_should_hoist_pfalse
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: test_should_hoist_pfalse
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64 = COPY $x1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
+  ; CHECK-NEXT:   MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr64all = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gpr64all = COPY [[COPY]]
+  ; CHECK-NEXT:   [[PFALSE:%[0-9]+]]:ppr = PFALSE implicit $vg
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gpr64common = PHI [[COPY2]], %bb.0, %5, %bb.1
+  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:gpr64sp = PHI [[COPY3]], %bb.0, %7, %bb.1
+  ; CHECK-NEXT:   STR_PXI [[PFALSE]], [[PHI]], 0
+  ; CHECK-NEXT:   [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[PHI1]], 1, 0, implicit-def $nzcv
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:gpr64all = COPY [[SUBSXri]]
+  ; CHECK-NEXT:   [[INCD_XPiI:%[0-9]+]]:gpr64 = INCD_XPiI [[PHI]], 31, 1
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:gpr64all = COPY [[INCD_XPiI]]
+  ; CHECK-NEXT:   Bcc 1, %bb.1, implicit $nzcv
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+  ; CHECK-NEXT:   RET_ReallyLR
+  bb.0:
+    successors: %bb.1
+    liveins: $x0, $x1
+
+    %5:gpr64 = COPY $x1
+    %4:gpr64 = COPY $x0
+    MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+    %6:gpr64all = COPY %4
+    %7:gpr64all = COPY %5
+
+  bb.1:
+    successors: %bb.2, %bb.1
+
+    %0:gpr64common = PHI %6, %bb.0, %3, %bb.1
+    %1:gpr64sp = PHI %7, %bb.0, %2, %bb.1
+    %8:ppr = PFALSE implicit $vg
+    STR_PXI killed %8, %0, 0
+    %9:gpr64 = SUBSXri %1, 1, 0, implicit-def $nzcv
+    %2:gpr64all = COPY %9
+    %10:gpr64 = INCD_XPiI %0, 31, 1
+    %3:gpr64all = COPY %10
+
+
+    Bcc 1, %bb.1, implicit $nzcv
+    B %bb.2
+
+  bb.2:
+    MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+    RET_ReallyLR
+...
diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll b/llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll
index d8969fc..22d177c 100644
--- a/llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll
+++ b/llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll
@@ -20,10 +20,10 @@ entry:
 ; CHECK-LABEL: define void @OneVarNoInit(
 ; CHECK-DAG:  [[X:%.*]] = alloca { i32, [12 x i8] }, align 16
 ; CHECK-DAG:  [[TX:%.*]] = call ptr @llvm.aarch64.tagp.{{.*}}(ptr [[X]], {{.*}}, i64 0)
-; CHECK-DAG:  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TX]])
+; CHECK-DAG:  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[X]])
 ; CHECK-DAG:  call void @llvm.aarch64.settag(ptr [[TX]], i64 16)
 ; CHECK-DAG:  call void @use(ptr nonnull [[TX]])
-; CHECK-DAG:  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TX]])
+; CHECK-DAG:  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[X]])
 
 define void @OneVarInitConst() sanitize_memtag {
 entry:
diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-stack-coloring.ll b/llvm/test/CodeGen/AArch64/stack-tagging-stack-coloring.ll
index 6eb7201..8134962 100644
--- a/llvm/test/CodeGen/AArch64/stack-tagging-stack-coloring.ll
+++ b/llvm/test/CodeGen/AArch64/stack-tagging-stack-coloring.ll
@@ -1,20 +1,20 @@
 ; Test that storage for allocas with disjoint lifetimes is reused with stack
 ; tagging.
 
-; RUN: opt -S -aarch64-stack-tagging %s -o - | \
-; RUN:   llc -no-stack-coloring=false -o - | \
+; RUN: opt -S -aarch64-stack-tagging -stack-tagging-use-stack-safety=0 %s -o - | \
+; RUN:   llc --mattr=+mte -no-stack-coloring=false -o - | \
 ; RUN:   FileCheck %s --check-prefix=COLOR
-; RUN: opt -S -aarch64-stack-tagging %s -o - | \
-; RUN:   llc -no-stack-coloring=true -o - | \
+; RUN: opt -S -aarch64-stack-tagging %s -stack-tagging-use-stack-safety=0 -o - | \
+; RUN:   llc --mattr=+mte -no-stack-coloring=true -o - | \
 ; RUN:   FileCheck %s --check-prefix=NOCOLOR
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64-unknown-linux-android29"
+target triple = "aarch64"
 
-; COLOR: sub	sp, sp, #192
-; NOCOLOR: sub	sp, sp, #320
+; COLOR: sub	sp, sp, #208
+; NOCOLOR: sub	sp, sp, #336
 
-define i32 @myCall_w2(i32 %in) sanitize_hwaddress {
+define i32 @myCall_w2(i32 %in) sanitize_memtag {
 entry:
   %a = alloca [17 x ptr], align 8
   %a2 = alloca [16 x ptr], align 8
diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-untag-placement.ll b/llvm/test/CodeGen/AArch64/stack-tagging-untag-placement.ll
index 06f8cd5..aa9cccc 100644
--- a/llvm/test/CodeGen/AArch64/stack-tagging-untag-placement.ll
+++ b/llvm/test/CodeGen/AArch64/stack-tagging-untag-placement.ll
@@ -27,7 +27,7 @@ S1:
 ; CHECK: call void @llvm.aarch64.settag(ptr %w, i64 48)
 ; CHECK-NOT: settag{{.*}}%v
   call void @llvm.lifetime.end.p0(i64 48, ptr nonnull %w) #1
-; CHECK: call void @llvm.lifetime.end.p0(i64 48, ptr nonnull %w.tag)
+; CHECK: call void @llvm.lifetime.end.p0(i64 48, ptr nonnull %w)
   %b1 = icmp eq i32 %t1, 0
   br i1 %b1, label %S2, label %S3
 ; CHECK-NOT: settag
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index d36f5c0..a6f9bb7e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -4142,11 +4142,11 @@ define i48 @v_saddsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 16, v[2:3]
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[2:3]
+; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
+; GFX9-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
-; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; GFX9-NEXT:    v_ashrrev_i64 v[0:1], 16, v[0:1]
@@ -4162,7 +4162,7 @@ define i48 @v_saddsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, v[4:5], v[0:1]
-; GFX10-NEXT:    v_add_co_u32 v1, s5, 0x80000000, v6
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, vcc_lo, s4
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc_lo
@@ -4179,7 +4179,7 @@ define i48 @v_saddsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, v[4:5], v[0:1]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v6
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, vcc_lo, s0
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v6 :: v_dual_cndmask_b32 v1, v5, v1
 ; GFX11-NEXT:    v_ashrrev_i64 v[0:1], 16, v[0:1]
@@ -4202,7 +4202,7 @@ define amdgpu_ps i48 @s_saddsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
 ; GFX6-NEXT:    s_ashr_i32 s2, s7, 31
 ; GFX6-NEXT:    s_ashr_i32 s5, s7, 15
-; GFX6-NEXT:    s_add_u32 s2, s2, 0xffff8000
+; GFX6-NEXT:    s_addk_i32 s2, 0x8000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s4
@@ -4227,7 +4227,7 @@ define amdgpu_ps i48 @s_saddsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
 ; GFX8-NEXT:    s_ashr_i32 s2, s7, 31
 ; GFX8-NEXT:    s_ashr_i32 s5, s7, 15
-; GFX8-NEXT:    s_add_u32 s2, s2, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s2, 0x8000
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
@@ -4250,7 +4250,7 @@ define amdgpu_ps i48 @s_saddsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[2:3], 0
 ; GFX9-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX9-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX9-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s4
@@ -4274,7 +4274,7 @@ define amdgpu_ps i48 @s_saddsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s1, s[2:3], 0
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX10-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX10-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX10-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX10-NEXT:    s_xor_b32 s0, s1, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s2, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s3, s0
@@ -4293,7 +4293,7 @@ define amdgpu_ps i48 @s_saddsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, s[4:5], s[0:1]
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s1, s[2:3], 0
 ; GFX11-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX11-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX11-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX11-NEXT:    s_xor_b32 s0, s1, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, s2, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s3, s0
@@ -4351,11 +4351,11 @@ define amdgpu_ps <2 x float> @saddsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v1, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0, v[0:1]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], 0, v[0:1]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
-; GFX9-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX9-NEXT:    v_ashrrev_i64 v[0:1], 16, v[0:1]
@@ -4371,7 +4371,7 @@ define amdgpu_ps <2 x float> @saddsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
 ; GFX10-NEXT:    v_cmp_gt_i64_e64 s0, 0, v[0:1]
-; GFX10-NEXT:    v_add_co_u32 v1, s1, 0x80000000, v4
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
@@ -4388,7 +4388,7 @@ define amdgpu_ps <2 x float> @saddsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
 ; GFX11-NEXT:    v_cmp_gt_i64_e64 s0, 0, v[0:1]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v4
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v4 :: v_dual_cndmask_b32 v1, v3, v1
 ; GFX11-NEXT:    v_ashrrev_i64 v[0:1], 16, v[0:1]
@@ -4442,15 +4442,15 @@ define amdgpu_ps <2 x float> @saddsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX9-LABEL: saddsat_i48_vs:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
-; GFX9-NEXT:    s_lshl_b64 s[2:3], s[0:1], 16
-; GFX9-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s2, v0
+; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 16
+; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], v[2:3], v[0:1]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[2:3], 0
+; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[2:3], v[0:1]
+; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
-; GFX9-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX9-NEXT:    v_ashrrev_i64 v[0:1], 16, v[0:1]
@@ -4466,7 +4466,7 @@ define amdgpu_ps <2 x float> @saddsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s0, s[0:1], 0
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX10-NEXT:    v_add_co_u32 v1, s1, 0x80000000, v4
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
@@ -4483,7 +4483,7 @@ define amdgpu_ps <2 x float> @saddsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, s[0:1], 0
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v4
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v4 :: v_dual_cndmask_b32 v1, v3, v1
 ; GFX11-NEXT:    v_ashrrev_i64 v[0:1], 16, v[0:1]
@@ -4529,11 +4529,11 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[2:3]
+; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
+; GFX9-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
-; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -4546,7 +4546,7 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX10-NEXT:    v_cmp_gt_i64_e64 s4, 0, v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
-; GFX10-NEXT:    v_add_co_u32 v1, s5, 0x80000000, v6
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc_lo
@@ -4560,7 +4560,7 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX11-NEXT:    v_cmp_gt_i64_e64 s0, 0, v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v6
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v6 :: v_dual_cndmask_b32 v1, v5, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -4578,7 +4578,7 @@ define amdgpu_ps i64 @s_saddsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[2:3], 0
 ; GFX6-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX6-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX6-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s4
@@ -4599,7 +4599,7 @@ define amdgpu_ps i64 @s_saddsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[2:3], 0
 ; GFX8-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX8-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX8-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
@@ -4620,7 +4620,7 @@ define amdgpu_ps i64 @s_saddsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[2:3], 0
 ; GFX9-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX9-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX9-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s4
@@ -4641,7 +4641,7 @@ define amdgpu_ps i64 @s_saddsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s1, s[2:3], 0
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX10-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX10-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX10-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX10-NEXT:    s_xor_b32 s0, s1, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s2, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s3, s0
@@ -4657,7 +4657,7 @@ define amdgpu_ps i64 @s_saddsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, s[4:5], s[0:1]
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s1, s[2:3], 0
 ; GFX11-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX11-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX11-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX11-NEXT:    s_xor_b32 s0, s1, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, s2, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s3, s0
@@ -4702,11 +4702,11 @@ define amdgpu_ps <2 x float> @saddsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v1, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0, v[0:1]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], 0, v[0:1]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
-; GFX9-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -4718,7 +4718,7 @@ define amdgpu_ps <2 x float> @saddsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
 ; GFX10-NEXT:    v_cmp_gt_i64_e64 s0, 0, v[0:1]
-; GFX10-NEXT:    v_add_co_u32 v1, s1, 0x80000000, v4
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
@@ -4731,7 +4731,7 @@ define amdgpu_ps <2 x float> @saddsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
 ; GFX11-NEXT:    v_cmp_gt_i64_e64 s0, 0, v[0:1]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v4
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v4 :: v_dual_cndmask_b32 v1, v3, v1
 ; GFX11-NEXT:    ; return to shader part epilog
@@ -4774,11 +4774,11 @@ define amdgpu_ps <2 x float> @saddsat_i64_vs(i64 %lhs, i64 inreg %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[2:3], v[2:3], v[0:1]
+; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[2:3], v[0:1]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
-; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], s[2:3]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -4790,7 +4790,7 @@ define amdgpu_ps <2 x float> @saddsat_i64_vs(i64 %lhs, i64 inreg %rhs) {
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s0, s[0:1], 0
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX10-NEXT:    v_add_co_u32 v1, s1, 0x80000000, v4
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
@@ -4803,7 +4803,7 @@ define amdgpu_ps <2 x float> @saddsat_i64_vs(i64 %lhs, i64 inreg %rhs) {
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, s[0:1], 0
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v4
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v4 :: v_dual_cndmask_b32 v1, v3, v1
 ; GFX11-NEXT:    ; return to shader part epilog
@@ -4866,21 +4866,20 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v0, v4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v9, vcc, v1, v5, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[4:5]
+; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1]
+; GFX9-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[4:5]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, v0, v1
-; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v2, v6
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v3, v7, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[2:3]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[6:7]
+; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[6:7]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, 0x80000000, v2
-; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
+; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
+; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -4896,10 +4895,10 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[0:1]
 ; GFX10-NEXT:    v_cmp_gt_i64_e64 s4, 0, v[4:5]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v11
-; GFX10-NEXT:    v_cmp_gt_i64_e64 s6, 0, v[6:7]
-; GFX10-NEXT:    v_add_co_u32 v1, s5, 0x80000000, v12
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s5, v[10:11], v[2:3]
-; GFX10-NEXT:    v_add_co_u32 v3, s7, 0x80000000, v4
+; GFX10-NEXT:    v_cmp_gt_i64_e64 s6, 0, v[6:7]
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v12
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v8, v12, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc_lo
@@ -4921,8 +4920,8 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v11
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s1, v[10:11], v[2:3]
 ; GFX11-NEXT:    v_cmp_gt_i64_e64 s2, 0, v[6:7]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v12
-; GFX11-NEXT:    v_add_co_u32 v3, null, 0x80000000, v4
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v12
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v8, v12 :: v_dual_cndmask_b32 v1, v9, v1
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s2, s1
@@ -4942,7 +4941,7 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, s[8:9], v[0:1]
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[4:5], 0
 ; GFX6-NEXT:    s_ashr_i32 s4, s9, 31
-; GFX6-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX6-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s8
@@ -4957,7 +4956,7 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1]
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[6:7], 0
 ; GFX6-NEXT:    s_ashr_i32 s4, s1, 31
-; GFX6-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX6-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    v_mov_b32_e32 v4, s0
@@ -4980,7 +4979,7 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[8:9], v[0:1]
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[4:5], 0
 ; GFX8-NEXT:    s_ashr_i32 s4, s9, 31
-; GFX8-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX8-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s8
@@ -4995,7 +4994,7 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1]
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[6:7], 0
 ; GFX8-NEXT:    s_ashr_i32 s4, s1, 31
-; GFX8-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX8-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8-NEXT:    v_mov_b32_e32 v4, s0
@@ -5018,7 +5017,7 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[8:9], v[0:1]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[4:5], 0
 ; GFX9-NEXT:    s_ashr_i32 s4, s9, 31
-; GFX9-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX9-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s8
@@ -5033,7 +5032,7 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[6:7], 0
 ; GFX9-NEXT:    s_ashr_i32 s4, s1, 31
-; GFX9-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX9-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9-NEXT:    v_mov_b32_e32 v4, s0
@@ -5056,7 +5055,7 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s1, s[4:5], 0
 ; GFX10-NEXT:    s_ashr_i32 s4, s9, 31
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s9
-; GFX10-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX10-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX10-NEXT:    s_xor_b32 s8, s1, s0
 ; GFX10-NEXT:    s_add_u32 s0, s2, s6
 ; GFX10-NEXT:    s_addc_u32 s1, s3, s7
@@ -5067,7 +5066,7 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s4, s8
 ; GFX10-NEXT:    s_ashr_i32 s4, s1, 31
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s5, s8
-; GFX10-NEXT:    s_add_u32 s0, s4, 0x80000000
+; GFX10-NEXT:    s_add_i32 s0, s4, 0x80000000
 ; GFX10-NEXT:    s_xor_b32 s1, s3, s2
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s4, s1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, s0, s1
@@ -5085,7 +5084,7 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, s[8:9], s[0:1]
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s1, s[4:5], 0
 ; GFX11-NEXT:    s_ashr_i32 s4, s9, 31
-; GFX11-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX11-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX11-NEXT:    s_xor_b32 s8, s1, s0
 ; GFX11-NEXT:    s_add_u32 s0, s2, s6
 ; GFX11-NEXT:    s_addc_u32 s1, s3, s7
@@ -5095,7 +5094,7 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, s4, s8
 ; GFX11-NEXT:    s_ashr_i32 s4, s1, 31
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s5, s8
-; GFX11-NEXT:    s_add_u32 s0, s4, 0x80000000
+; GFX11-NEXT:    s_add_i32 s0, s4, 0x80000000
 ; GFX11-NEXT:    s_xor_b32 s1, s3, s2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s4, s1
 ; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, s0, s1
@@ -5132,7 +5131,7 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX6-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX6-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX6-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX6-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s5
@@ -5179,7 +5178,7 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX8-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX8-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s5
@@ -5226,7 +5225,7 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s5
@@ -5269,7 +5268,7 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX10-NEXT:    s_ashr_i32 s0, s9, 31
-; GFX10-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX10-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5310,7 +5309,7 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
 ; GFX11-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX11-NEXT:    s_ashr_i32 s0, s9, 31
-; GFX11-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX11-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 1, v0
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
@@ -5412,9 +5411,8 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v5
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, v7, 0, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v2, v2, v6
-; GFX9-NEXT:    v_bfrev_b32_e32 v6, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v3, v6
 ; GFX9-NEXT:    v_and_b32_e32 v2, 1, v2
+; GFX9-NEXT:    v_add_u32_e32 v6, 0x80000000, v3
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -5440,7 +5438,7 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v8, 0, vcc_lo
 ; GFX10-NEXT:    v_xor_b32_e32 v2, v2, v6
-; GFX10-NEXT:    v_add_co_u32 v6, s0, 0x80000000, v3
+; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0x80000000, v3
 ; GFX10-NEXT:    v_and_b32_e32 v2, 1, v2
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
@@ -5467,7 +5465,7 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v5
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, v8, 0, vcc_lo
 ; GFX11-NEXT:    v_xor_b32_e32 v2, v2, v6
-; GFX11-NEXT:    v_add_co_u32 v6, null, 0x80000000, v3
+; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0x80000000, v3
 ; GFX11-NEXT:    v_and_b32_e32 v2, 1, v2
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
@@ -5569,9 +5567,8 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
-; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v2, v1
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
@@ -5597,9 +5594,9 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v8, 0, s0
-; GFX10-NEXT:    v_add_co_u32 v3, s0, 0x80000000, v2
 ; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
@@ -5627,15 +5624,14 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
-; GFX11-NEXT:    v_add_co_u32 v3, null, 0x80000000, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, v1, v0 :: v_dual_add_nc_u32 v3, 0x80000000, v2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, v8, 0, s0
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc_lo
-; GFX11-NEXT:    v_dual_cndmask_b32 v2, v6, v2 :: v_dual_cndmask_b32 v3, v7, v3
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v2 :: v_dual_cndmask_b32 v3, v7, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc_lo
 ; GFX11-NEXT:    ; return to shader part epilog
   %result = call i128 @llvm.sadd.sat.i128(i128 %lhs, i128 %rhs)
   %cast = bitcast i128 %result to <4 x float>
@@ -5762,12 +5758,11 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v17
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[10:11]
+; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
-; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v2, v1
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v8, v2, vcc
@@ -5786,11 +5781,11 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v6, 31, v11
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
 ; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[14:15]
+; GFX9-NEXT:    v_add_u32_e32 v7, 0x80000000, v6
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v4, v5, v4
-; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, 0x80000000, v6
 ; GFX9-NEXT:    v_and_b32_e32 v4, 1, v4
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v8, v6, vcc
@@ -5832,18 +5827,18 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v19
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[14:15]
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v17
-; GFX10-NEXT:    v_add_co_u32 v7, s5, 0x80000000, v6
+; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0x80000000, v6
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
-; GFX10-NEXT:    v_add_co_u32 v4, s4, 0x80000000, v3
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-NEXT:    v_xor_b32_e32 v1, v2, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, v8, v3, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, v16, v3, vcc_lo
-; GFX10-NEXT:    v_and_b32_e32 v5, 1, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc_lo
+; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v17
+; GFX10-NEXT:    v_and_b32_e32 v3, 1, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v4, 0x80000000, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v8, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v9, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v16, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e64 s4, 0, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v17, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s4, 0, v5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v4, v12, v6, s4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, v13, v6, s4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, v18, v6, s4
@@ -5882,18 +5877,17 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v19
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[14:15]
-; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v17
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT:    v_add_co_u32 v7, null, 0x80000000, v6
+; GFX11-NEXT:    v_add_nc_u32_e32 v7, 0x80000000, v6
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_add_co_u32 v4, null, 0x80000000, v3
 ; GFX11-NEXT:    v_xor_b32_e32 v1, v2, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v8, v3, vcc_lo
-; GFX11-NEXT:    v_dual_cndmask_b32 v2, v16, v3 :: v_dual_and_b32 v5, 1, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, v17, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, v5
+; GFX11-NEXT:    v_ashrrev_i32_e32 v2, 31, v17
+; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT:    v_add_nc_u32_e32 v4, 0x80000000, v2
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, v8, v2 :: v_dual_and_b32 v3, 1, v1
+; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v9, v2, vcc_lo
+; GFX11-NEXT:    v_dual_cndmask_b32 v2, v16, v2 :: v_dual_cndmask_b32 v3, v17, v4
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, v12, v6, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v5, v13, v6, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v6, v18, v6, s0
@@ -5927,7 +5921,7 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX6-NEXT:    s_ashr_i32 s0, s17, 31
 ; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX6-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX6-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s9
@@ -5960,7 +5954,7 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX6-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX6-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX6-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s1
@@ -6011,7 +6005,7 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s0, s17, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX8-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX8-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s9
@@ -6050,7 +6044,7 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX8-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX8-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s1
@@ -6101,7 +6095,7 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s0, s17, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s9
@@ -6140,7 +6134,7 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX9-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
@@ -6184,7 +6178,7 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX10-NEXT:    s_and_b32 s1, 1, s1
 ; GFX10-NEXT:    s_ashr_i32 s10, s17, 31
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
-; GFX10-NEXT:    s_add_u32 s11, s10, 0x80000000
+; GFX10-NEXT:    s_add_i32 s11, s10, 0x80000000
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
 ; GFX10-NEXT:    s_add_u32 s0, s4, s12
@@ -6221,7 +6215,7 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX10-NEXT:    v_xor_b32_e32 v1, v2, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s17
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, s10, vcc_lo
-; GFX10-NEXT:    s_add_u32 s0, s4, 0x80000000
+; GFX10-NEXT:    s_add_i32 s0, s4, 0x80000000
 ; GFX10-NEXT:    v_readfirstlane_b32 s1, v4
 ; GFX10-NEXT:    v_and_b32_e32 v1, 1, v1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
@@ -6261,7 +6255,7 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX11-NEXT:    s_and_b32 s1, 1, s1
 ; GFX11-NEXT:    s_ashr_i32 s10, s17, 31
 ; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
-; GFX11-NEXT:    s_add_u32 s11, s10, 0x80000000
+; GFX11-NEXT:    s_add_i32 s11, s10, 0x80000000
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
 ; GFX11-NEXT:    s_add_u32 s0, s4, s12
@@ -6299,7 +6293,7 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, s10, vcc_lo
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
-; GFX11-NEXT:    s_add_u32 s0, s4, 0x80000000
+; GFX11-NEXT:    s_add_i32 s0, s4, 0x80000000
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX11-NEXT:    v_readfirstlane_b32 s1, v4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 0a6b7af..84906c0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -3091,253 +3091,252 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0, v1
-; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v3
+; GISEL-NEXT:    v_add_i32_e64 v3, s[4:5], 0, 0
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 0, v1
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v1
-; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v3
-; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v1, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v3
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v1
+; GISEL-NEXT:    v_subb_u32_e32 v12, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v12, v4
-; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8]
-; GISEL-NEXT:    v_mul_hi_u32 v4, v12, v4
-; GISEL-NEXT:    v_mul_lo_u32 v8, v9, v7
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v13, v12, v7
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT:    v_mul_hi_u32 v8, v9, v7
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v13, v8
-; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v7
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
+; GISEL-NEXT:    v_trunc_f32_e32 v9, v7
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v9
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v9
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
+; GISEL-NEXT:    v_mov_b32_e32 v5, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6]
+; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v7
+; GISEL-NEXT:    v_mul_hi_u32 v14, v10, v7
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
+; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, v13, v8
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v4
-; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, v12, v5, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v12, v4
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, 0, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8]
-; GISEL-NEXT:    v_mul_hi_u32 v0, v9, v4
-; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, v9, v7
-; GISEL-NEXT:    v_mul_hi_u32 v4, v12, v4
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v5
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v13, v7, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
+; GISEL-NEXT:    v_mov_b32_e32 v5, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6]
+; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v7
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 0, v0
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
+; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
+; GISEL-NEXT:    v_and_b32_e32 v12, 0xffffff, v2
+; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v6
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v5, v12, v7
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
-; GISEL-NEXT:    v_mul_hi_u32 v8, v9, v7
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v7
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
+; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v8
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v12, v4, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v5, v11, v0
-; GISEL-NEXT:    v_mul_lo_u32 v7, v10, v4
-; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v11, v0
-; GISEL-NEXT:    v_and_b32_e32 v12, 0xffffff, v2
+; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v4
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_mul_hi_u32 v7, v10, v4
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v13, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, v3, v0
+; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v5
+; GISEL-NEXT:    v_mul_hi_u32 v9, v11, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, v3, v0
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v0, v5
-; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v3, v9, 0
+; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v7
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v1, v10, 0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v3, v0, v[5:6]
-; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v6
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v1, v9, v[7:8]
-; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v10, v4
-; GISEL-NEXT:    v_subb_u32_e64 v7, s[4:5], v11, v5, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v11, v5
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
+; GISEL-NEXT:    v_mov_b32_e32 v5, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v1, v0, v[5:6]
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v10, v[8:9]
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v11, v7
+; GISEL-NEXT:    v_subb_u32_e64 v7, s[4:5], v3, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v3, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], 0, v2
-; GISEL-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v11, v4
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v2
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v1
-; GISEL-NEXT:    v_subb_u32_e32 v5, vcc, v5, v1, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v11, 0x4f800000, v13
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v6, v3
-; GISEL-NEXT:    v_subbrev_u32_e32 v11, vcc, 0, v5, vcc
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v7
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
-; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], 0, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v11, v2
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v3
+; GISEL-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
+; GISEL-NEXT:    v_mac_f32_e32 v11, 0x4f800000, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v8, v9, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v6, v1
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v4
+; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, 0, v2
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v5
-; GISEL-NEXT:    v_sub_i32_e32 v15, vcc, 0, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v14, v6
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v15, v13, 0
-; GISEL-NEXT:    v_subb_u32_e32 v16, vcc, 0, v2, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v15, v14, v[6:7]
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, 1, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v16, v13, v[6:7]
-; GISEL-NEXT:    v_addc_u32_e32 v18, vcc, 0, v0, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v3
-; GISEL-NEXT:    v_mul_lo_u32 v7, v14, v5
-; GISEL-NEXT:    v_mul_lo_u32 v10, v13, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v1
-; GISEL-NEXT:    v_mul_hi_u32 v1, v13, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v19, v3, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v7, v1
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v14, v11, 0
+; GISEL-NEXT:    v_subb_u32_e32 v15, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v14, v13, v[5:6]
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, 1, v10
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v15, v11, v[5:6]
+; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, 0, v0, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v6, v13, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v5
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v18, v1, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v1, v11, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v6, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v14, v6
-; GISEL-NEXT:    v_mul_hi_u32 v5, v14, v5
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; GISEL-NEXT:    v_mul_hi_u32 v10, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
-; GISEL-NEXT:    v_mul_hi_u32 v6, v14, v6
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v1
-; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v14, v5, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v15, v10, 0
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v17
-; GISEL-NEXT:    v_mov_b32_e32 v1, v6
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v15, v11, v[1:2]
-; GISEL-NEXT:    v_addc_u32_e32 v14, vcc, 0, v18, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v16, v10, v[6:7]
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v17, v13, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v18, v14, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT:    v_mul_lo_u32 v7, v11, v5
-; GISEL-NEXT:    v_mul_lo_u32 v8, v10, v6
-; GISEL-NEXT:    v_mul_hi_u32 v13, v10, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
-; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], 0, v12
-; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v13
+; GISEL-NEXT:    v_mul_lo_u32 v6, v13, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v13, v4
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT:    v_mul_hi_u32 v5, v13, v5
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v1
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v13, v4, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v14, v8, 0
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v16
+; GISEL-NEXT:    v_mov_b32_e32 v1, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v14, v11, v[1:2]
+; GISEL-NEXT:    v_addc_u32_e32 v18, vcc, 0, v17, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v15, v8, v[5:6]
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v16, v13, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v17, v18, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], 0, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v4
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v6
-; GISEL-NEXT:    v_mul_hi_u32 v5, v11, v5
-; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
-; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v6
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v13, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v13, v8
-; GISEL-NEXT:    v_mul_hi_u32 v6, v11, v6
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v7
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v11, v4
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v5
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v12, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v10, v5
-; GISEL-NEXT:    v_addc_u32_e64 v6, s[4:5], v11, v6, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v7, v12, v5
-; GISEL-NEXT:    v_mul_lo_u32 v8, v9, v6
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v0, v3, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v0, v9, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v12, v5
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v12, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
-; GISEL-NEXT:    v_mul_hi_u32 v8, v9, v6
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v12, v7
+; GISEL-NEXT:    v_mul_hi_u32 v5, v11, v5
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
+; GISEL-NEXT:    v_addc_u32_e64 v5, s[4:5], v11, v5, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v7, v10, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v0, v9, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v5, v0
-; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v6
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v8, 0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v5
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v6
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v4, v10, v[0:1]
+; GISEL-NEXT:    v_mul_hi_u32 v7, v10, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v4, v0
+; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v7, 0
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v2, v9, v[0:1]
 ; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0, v1
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v2, v8, v[6:7]
-; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v9, v5
-; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v12, v6, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v12, v6
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v2
-; GISEL-NEXT:    v_subb_u32_e32 v6, vcc, v6, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v4
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v2
-; GISEL-NEXT:    v_subbrev_u32_e32 v6, vcc, 0, v6, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v7, v9, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v8
-; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v10, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v6, v2
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v7, v[5:6]
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v10, v4
+; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v3, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
+; GISEL-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v2
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v4, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v6, v3
+; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, v8, v10, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v7
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v3, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v7
-; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v8
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v10, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v8, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v4, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
 ; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 0, v2
 ; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index c455b24..83ebc84 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -3034,253 +3034,251 @@ define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v4
+; GISEL-NEXT:    v_add_i32_e64 v3, s[4:5], 0, 0
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 0, v1
-; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v1
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
-; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v1
-; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v1
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v3
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v1
+; GISEL-NEXT:    v_subb_u32_e32 v12, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v12, v4
-; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8]
-; GISEL-NEXT:    v_mul_hi_u32 v4, v12, v4
-; GISEL-NEXT:    v_mul_lo_u32 v8, v9, v7
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v13, v12, v7
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT:    v_mul_hi_u32 v8, v9, v7
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v13, v8
-; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v7
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
+; GISEL-NEXT:    v_trunc_f32_e32 v9, v7
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v9
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v9
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
+; GISEL-NEXT:    v_mov_b32_e32 v5, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6]
+; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v7
+; GISEL-NEXT:    v_mul_hi_u32 v14, v10, v7
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
+; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, v13, v8
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v4
-; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, v12, v5, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v12, v4
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, 0, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8]
-; GISEL-NEXT:    v_mul_hi_u32 v0, v9, v4
-; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, v9, v7
-; GISEL-NEXT:    v_mul_hi_u32 v4, v12, v4
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v5
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v13, v7, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
+; GISEL-NEXT:    v_mov_b32_e32 v5, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6]
+; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v7
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 0, v0
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
+; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
+; GISEL-NEXT:    v_and_b32_e32 v12, 0xffffff, v2
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v5, v12, v7
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
-; GISEL-NEXT:    v_mul_hi_u32 v8, v9, v7
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v7
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
+; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v8
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v12, v4, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v5, v11, v0
-; GISEL-NEXT:    v_mul_lo_u32 v7, v10, v4
-; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v11, v0
-; GISEL-NEXT:    v_and_b32_e32 v12, 0xffffff, v2
+; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v4
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_mul_hi_u32 v7, v10, v4
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v13, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, v3, v0
+; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v5
+; GISEL-NEXT:    v_mul_hi_u32 v9, v11, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, v3, v0
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v0, v5
-; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v1, v9, 0
+; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v7
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v1, v10, 0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v1, v7, v[0:1]
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v1, v5, v[0:1]
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v6
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v9, v[7:8]
-; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v10, v4
-; GISEL-NEXT:    v_subb_u32_e64 v9, s[4:5], v11, v5, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v11, v5
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v3
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v11, v7
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v10, v[8:9]
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v3, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], 0, v0
-; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5]
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v10, v4
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v3
-; GISEL-NEXT:    v_subb_u32_e32 v13, vcc, v5, v3, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v10
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v6, v9, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e32 v10, vcc, v5, v3, vcc
+; GISEL-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v4
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v6, v7, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v8, v1
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, v7, v1
+; GISEL-NEXT:    v_subbrev_u32_e64 v13, s[4:5], 0, v10, vcc
 ; GISEL-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v0
-; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
-; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v15, v0
-; GISEL-NEXT:    v_subbrev_u32_e64 v14, s[4:5], 0, v13, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v16, s[4:5], 0, v2
-; GISEL-NEXT:    v_subb_u32_e64 v17, s[4:5], 0, v4, s[4:5]
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v16, v15, 0
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v18, v7
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v3
-; GISEL-NEXT:    v_mov_b32_e32 v0, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, -1, s[4:5]
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v16, v18, v[0:1]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v1
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v0
+; GISEL-NEXT:    v_trunc_f32_e32 v6, v4
+; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v14, v0
+; GISEL-NEXT:    v_sub_i32_e64 v15, s[4:5], 0, v2
+; GISEL-NEXT:    v_subb_u32_e64 v16, s[4:5], 0, v3, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v15, v14, 0
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v17, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v13, v3
+; GISEL-NEXT:    v_mov_b32_e32 v0, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, -1, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v15, v17, v[0:1]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v17, v15, v[6:7]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v14, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, v19, v0, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v0, v18, v5
-; GISEL-NEXT:    v_mul_lo_u32 v19, v15, v6
-; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v13, v3, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v13, v15, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v16, v14, v[5:6]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v13, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, v18, v0, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v0, v17, v4
+; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v5
+; GISEL-NEXT:    v_mul_hi_u32 v19, v14, v4
+; GISEL-NEXT:    v_subb_u32_e32 v10, vcc, v10, v3, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v13, v18, v6
-; GISEL-NEXT:    v_mul_hi_u32 v5, v18, v5
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v19, v0
-; GISEL-NEXT:    v_mul_hi_u32 v19, v15, v6
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v13, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v19
+; GISEL-NEXT:    v_mul_lo_u32 v19, v17, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v17, v4
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v18, v0
+; GISEL-NEXT:    v_mul_hi_u32 v18, v14, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v19, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v19
-; GISEL-NEXT:    v_mul_hi_u32 v6, v18, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v13, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v0
-; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, v18, v5, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v16, v13, 0
-; GISEL-NEXT:    v_sub_i32_e32 v18, vcc, v10, v1
-; GISEL-NEXT:    v_mov_b32_e32 v0, v6
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v16, v15, v[0:1]
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v17, v13, v[0:1]
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GISEL-NEXT:    v_cndmask_b32_e32 v6, v10, v18, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v14, v3, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v8, v6, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v6, v15, v5
-; GISEL-NEXT:    v_mul_lo_u32 v7, v13, v0
-; GISEL-NEXT:    v_mul_hi_u32 v11, v13, v5
-; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], 0, v12
-; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v11, v15, v0
-; GISEL-NEXT:    v_mul_hi_u32 v5, v15, v5
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
-; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v0
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v11, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
+; GISEL-NEXT:    v_mul_hi_u32 v5, v17, v5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v18, v4
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v0
+; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, v17, v4, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v15, v14, 0
+; GISEL-NEXT:    v_sub_i32_e32 v18, vcc, v11, v1
+; GISEL-NEXT:    v_mov_b32_e32 v0, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v15, v17, v[0:1]
+; GISEL-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v16, v14, v[0:1]
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v18, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v13, v10, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v5, v17, v4
+; GISEL-NEXT:    v_mul_lo_u32 v7, v14, v0
+; GISEL-NEXT:    v_mul_hi_u32 v10, v14, v4
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], 0, v12
 ; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
-; GISEL-NEXT:    v_mul_hi_u32 v0, v15, v0
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
-; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v0, v6
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v13, v5
-; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v15, v0, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v6, v10, v5
-; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v0
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v10, v5
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v10, v17, v0
+; GISEL-NEXT:    v_mul_hi_u32 v4, v17, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v7, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v14, v0
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v10, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
+; GISEL-NEXT:    v_mul_hi_u32 v0, v17, v0
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v7, v5
+; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v0, v5
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v14, v4
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v17, v0, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v5, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v7, v9, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v6, v9, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v0
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v9, v0
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v0
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v0
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v5, v6
-; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v2, v9, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v0, v7
-; GISEL-NEXT:    v_mov_b32_e32 v0, v6
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v2, v7, v[0:1]
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v4, v5
+; GISEL-NEXT:    v_mul_hi_u32 v0, v3, v0
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v7, 0
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v0, v6
+; GISEL-NEXT:    v_mov_b32_e32 v0, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[0:1]
 ; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0, v1
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v4, v9, v[6:7]
-; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v8, v5
-; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v10, v6, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v10, v6
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v4
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v7, v[5:6]
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v3, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v2
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v4
-; GISEL-NEXT:    v_subb_u32_e32 v6, vcc, v6, v4, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v6, v3
+; GISEL-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v3, v2
-; GISEL-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v6, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v4
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v4, v2
+; GISEL-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v5, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v2
 ; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v4
-; GISEL-NEXT:    v_subb_u32_e32 v4, vcc, v6, v4, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v3
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v5, v3, vcc
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v9, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
 ; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 0, v2
 ; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 61e1e67..320dfbb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -4142,11 +4142,11 @@ define i48 @v_ssubsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 16, v[2:3]
 ; GFX9-NEXT:    v_sub_co_u32_e32 v4, vcc, v0, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[2:3]
+; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
+; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
-; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; GFX9-NEXT:    v_ashrrev_i64 v[0:1], 16, v[0:1]
@@ -4162,7 +4162,7 @@ define i48 @v_ssubsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, v[4:5], v[0:1]
-; GFX10-NEXT:    v_add_co_u32 v1, s5, 0x80000000, v6
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, vcc_lo, s4
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc_lo
@@ -4179,7 +4179,7 @@ define i48 @v_ssubsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, v[4:5], v[0:1]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v6
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, vcc_lo, s0
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v6 :: v_dual_cndmask_b32 v1, v5, v1
 ; GFX11-NEXT:    v_ashrrev_i64 v[0:1], 16, v[0:1]
@@ -4202,7 +4202,7 @@ define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], 0
 ; GFX6-NEXT:    s_ashr_i32 s2, s7, 31
 ; GFX6-NEXT:    s_ashr_i32 s5, s7, 15
-; GFX6-NEXT:    s_add_u32 s2, s2, 0xffff8000
+; GFX6-NEXT:    s_addk_i32 s2, 0x8000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s4
@@ -4227,7 +4227,7 @@ define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], 0
 ; GFX8-NEXT:    s_ashr_i32 s2, s7, 31
 ; GFX8-NEXT:    s_ashr_i32 s5, s7, 15
-; GFX8-NEXT:    s_add_u32 s2, s2, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s2, 0x8000
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
@@ -4250,7 +4250,7 @@ define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[2:3], 0
 ; GFX9-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX9-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX9-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s4
@@ -4274,7 +4274,7 @@ define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX10-NEXT:    v_cmp_gt_i64_e64 s1, s[2:3], 0
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX10-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX10-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX10-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX10-NEXT:    s_xor_b32 s0, s1, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s2, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s3, s0
@@ -4293,7 +4293,7 @@ define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, s[4:5], s[0:1]
 ; GFX11-NEXT:    v_cmp_gt_i64_e64 s1, s[2:3], 0
 ; GFX11-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX11-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX11-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX11-NEXT:    s_xor_b32 s0, s1, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, s2, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s3, s0
@@ -4351,11 +4351,11 @@ define amdgpu_ps <2 x float> @ssubsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, s0, v0
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v1, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0, v[0:1]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], 0, v[0:1]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
-; GFX9-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX9-NEXT:    v_ashrrev_i64 v[0:1], 16, v[0:1]
@@ -4371,7 +4371,7 @@ define amdgpu_ps <2 x float> @ssubsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s0, 0, v[0:1]
-; GFX10-NEXT:    v_add_co_u32 v1, s1, 0x80000000, v4
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
@@ -4388,7 +4388,7 @@ define amdgpu_ps <2 x float> @ssubsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, 0, v[0:1]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v4
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v4 :: v_dual_cndmask_b32 v1, v3, v1
 ; GFX11-NEXT:    v_ashrrev_i64 v[0:1], 16, v[0:1]
@@ -4442,15 +4442,15 @@ define amdgpu_ps <2 x float> @ssubsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX9-LABEL: ssubsat_i48_vs:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
-; GFX9-NEXT:    s_lshl_b64 s[2:3], s[0:1], 16
-; GFX9-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NEXT:    v_subrev_co_u32_e32 v2, vcc, s2, v0
+; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 16
+; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    v_subrev_co_u32_e32 v2, vcc, s0, v0
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], v[2:3], v[0:1]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[2:3], 0
+; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[2:3], v[0:1]
+; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], 0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
-; GFX9-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX9-NEXT:    v_ashrrev_i64 v[0:1], 16, v[0:1]
@@ -4466,7 +4466,7 @@ define amdgpu_ps <2 x float> @ssubsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX10-NEXT:    v_cmp_gt_i64_e64 s0, s[0:1], 0
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX10-NEXT:    v_add_co_u32 v1, s1, 0x80000000, v4
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
@@ -4483,7 +4483,7 @@ define amdgpu_ps <2 x float> @ssubsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX11-NEXT:    v_cmp_gt_i64_e64 s0, s[0:1], 0
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v4
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v4 :: v_dual_cndmask_b32 v1, v3, v1
 ; GFX11-NEXT:    v_ashrrev_i64 v[0:1], 16, v[0:1]
@@ -4529,11 +4529,11 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_sub_co_u32_e32 v4, vcc, v0, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[2:3]
+; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
+; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
-; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -4546,7 +4546,7 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, 0, v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
-; GFX10-NEXT:    v_add_co_u32 v1, s5, 0x80000000, v6
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc_lo
@@ -4560,7 +4560,7 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, 0, v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v6
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v6 :: v_dual_cndmask_b32 v1, v5, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -4578,7 +4578,7 @@ define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[2:3], 0
 ; GFX6-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX6-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX6-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s4
@@ -4599,7 +4599,7 @@ define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[2:3], 0
 ; GFX8-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX8-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX8-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
@@ -4620,7 +4620,7 @@ define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[2:3], 0
 ; GFX9-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX9-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX9-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s4
@@ -4641,7 +4641,7 @@ define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX10-NEXT:    v_cmp_gt_i64_e64 s1, s[2:3], 0
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX10-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX10-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX10-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX10-NEXT:    s_xor_b32 s0, s1, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s2, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s3, s0
@@ -4657,7 +4657,7 @@ define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, s[4:5], s[0:1]
 ; GFX11-NEXT:    v_cmp_gt_i64_e64 s1, s[2:3], 0
 ; GFX11-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX11-NEXT:    s_add_u32 s3, s2, 0x80000000
+; GFX11-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX11-NEXT:    s_xor_b32 s0, s1, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, s2, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s3, s0
@@ -4702,11 +4702,11 @@ define amdgpu_ps <2 x float> @ssubsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, s0, v0
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v1, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0, v[0:1]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], 0, v[0:1]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
-; GFX9-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -4718,7 +4718,7 @@ define amdgpu_ps <2 x float> @ssubsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s0, 0, v[0:1]
-; GFX10-NEXT:    v_add_co_u32 v1, s1, 0x80000000, v4
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
@@ -4731,7 +4731,7 @@ define amdgpu_ps <2 x float> @ssubsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, 0, v[0:1]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v4
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v4 :: v_dual_cndmask_b32 v1, v3, v1
 ; GFX11-NEXT:    ; return to shader part epilog
@@ -4774,11 +4774,11 @@ define amdgpu_ps <2 x float> @ssubsat_i64_vs(i64 %lhs, i64 inreg %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_subrev_co_u32_e32 v2, vcc, s0, v0
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[2:3], v[2:3], v[0:1]
+; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[2:3], v[0:1]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], 0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
-; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], s[2:3]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -4790,7 +4790,7 @@ define amdgpu_ps <2 x float> @ssubsat_i64_vs(i64 %lhs, i64 inreg %rhs) {
 ; GFX10-NEXT:    v_cmp_gt_i64_e64 s0, s[0:1], 0
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX10-NEXT:    v_add_co_u32 v1, s1, 0x80000000, v4
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
@@ -4803,7 +4803,7 @@ define amdgpu_ps <2 x float> @ssubsat_i64_vs(i64 %lhs, i64 inreg %rhs) {
 ; GFX11-NEXT:    v_cmp_gt_i64_e64 s0, s[0:1], 0
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v4
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v4 :: v_dual_cndmask_b32 v1, v3, v1
 ; GFX11-NEXT:    ; return to shader part epilog
@@ -4866,21 +4866,20 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_sub_co_u32_e32 v8, vcc, v0, v4
 ; GFX9-NEXT:    v_subb_co_u32_e32 v9, vcc, v1, v5, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[4:5]
+; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1]
+; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[4:5]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, v0, v1
-; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
+; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
+; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
 ; GFX9-NEXT:    v_sub_co_u32_e32 v4, vcc, v2, v6
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v3, v7, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[2:3]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[6:7]
+; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3]
+; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[6:7]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, 0x80000000, v2
-; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
+; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
+; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -4896,10 +4895,10 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[0:1]
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, 0, v[4:5]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v11
-; GFX10-NEXT:    v_cmp_lt_i64_e64 s6, 0, v[6:7]
-; GFX10-NEXT:    v_add_co_u32 v1, s5, 0x80000000, v12
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s5, v[10:11], v[2:3]
-; GFX10-NEXT:    v_add_co_u32 v3, s7, 0x80000000, v4
+; GFX10-NEXT:    v_cmp_lt_i64_e64 s6, 0, v[6:7]
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v12
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v8, v12, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc_lo
@@ -4921,8 +4920,8 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v11
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s1, v[10:11], v[2:3]
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s2, 0, v[6:7]
-; GFX11-NEXT:    v_add_co_u32 v1, null, 0x80000000, v12
-; GFX11-NEXT:    v_add_co_u32 v3, null, 0x80000000, v4
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v12
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v8, v12 :: v_dual_cndmask_b32 v1, v9, v1
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s2, s1
@@ -4942,7 +4941,7 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, s[8:9], v[0:1]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[4:5], 0
 ; GFX6-NEXT:    s_ashr_i32 s4, s9, 31
-; GFX6-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX6-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s8
@@ -4957,7 +4956,7 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[6:7], 0
 ; GFX6-NEXT:    s_ashr_i32 s4, s1, 31
-; GFX6-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX6-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    v_mov_b32_e32 v4, s0
@@ -4980,7 +4979,7 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[8:9], v[0:1]
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[4:5], 0
 ; GFX8-NEXT:    s_ashr_i32 s4, s9, 31
-; GFX8-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX8-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s8
@@ -4995,7 +4994,7 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1]
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[6:7], 0
 ; GFX8-NEXT:    s_ashr_i32 s4, s1, 31
-; GFX8-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX8-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8-NEXT:    v_mov_b32_e32 v4, s0
@@ -5018,7 +5017,7 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[8:9], v[0:1]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[4:5], 0
 ; GFX9-NEXT:    s_ashr_i32 s4, s9, 31
-; GFX9-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX9-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s8
@@ -5033,7 +5032,7 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[6:7], 0
 ; GFX9-NEXT:    s_ashr_i32 s4, s1, 31
-; GFX9-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX9-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9-NEXT:    v_mov_b32_e32 v4, s0
@@ -5056,7 +5055,7 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX10-NEXT:    v_cmp_gt_i64_e64 s1, s[4:5], 0
 ; GFX10-NEXT:    s_ashr_i32 s4, s9, 31
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s9
-; GFX10-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX10-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX10-NEXT:    s_xor_b32 s8, s1, s0
 ; GFX10-NEXT:    s_sub_u32 s0, s2, s6
 ; GFX10-NEXT:    s_subb_u32 s1, s3, s7
@@ -5067,7 +5066,7 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s4, s8
 ; GFX10-NEXT:    s_ashr_i32 s4, s1, 31
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s5, s8
-; GFX10-NEXT:    s_add_u32 s0, s4, 0x80000000
+; GFX10-NEXT:    s_add_i32 s0, s4, 0x80000000
 ; GFX10-NEXT:    s_xor_b32 s1, s3, s2
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s4, s1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, s0, s1
@@ -5085,7 +5084,7 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, s[8:9], s[0:1]
 ; GFX11-NEXT:    v_cmp_gt_i64_e64 s1, s[4:5], 0
 ; GFX11-NEXT:    s_ashr_i32 s4, s9, 31
-; GFX11-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX11-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX11-NEXT:    s_xor_b32 s8, s1, s0
 ; GFX11-NEXT:    s_sub_u32 s0, s2, s6
 ; GFX11-NEXT:    s_subb_u32 s1, s3, s7
@@ -5095,7 +5094,7 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, s4, s8
 ; GFX11-NEXT:    s_ashr_i32 s4, s1, 31
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s5, s8
-; GFX11-NEXT:    s_add_u32 s0, s4, 0x80000000
+; GFX11-NEXT:    s_add_i32 s0, s4, 0x80000000
 ; GFX11-NEXT:    s_xor_b32 s1, s3, s2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s4, s1
 ; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, s0, s1
@@ -5134,7 +5133,7 @@ define amdgpu_ps i128 @s_ssubsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX6-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX6-NEXT:    s_ashr_i32 s0, s11, 31
 ; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX6-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX6-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s9
@@ -5183,7 +5182,7 @@ define amdgpu_ps i128 @s_ssubsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s0, s11, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX8-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX8-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s9
@@ -5232,7 +5231,7 @@ define amdgpu_ps i128 @s_ssubsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s0, s11, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s9
@@ -5274,7 +5273,7 @@ define amdgpu_ps i128 @s_ssubsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s1
-; GFX10-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX10-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc_lo
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s9
 ; GFX10-NEXT:    v_mov_b32_e32 v3, s11
@@ -5317,7 +5316,7 @@ define amdgpu_ps i128 @s_ssubsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s1
-; GFX11-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX11-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX11-NEXT:    v_dual_cndmask_b32 v1, v3, v2 :: v_dual_mov_b32 v2, s9
 ; GFX11-NEXT:    v_mov_b32_e32 v3, s11
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
@@ -5427,9 +5426,8 @@ define amdgpu_ps <4 x float> @ssubsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v8
-; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v2, v1
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
@@ -5456,7 +5454,7 @@ define amdgpu_ps <4 x float> @ssubsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX10-NEXT:    v_add_co_u32 v3, s0, 0x80000000, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v2
 ; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v8
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
@@ -5484,8 +5482,7 @@ define amdgpu_ps <4 x float> @ssubsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX11-NEXT:    v_add_co_u32 v3, null, 0x80000000, v2
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, v1, v0 :: v_dual_add_nc_u32 v3, 0x80000000, v2
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v0, v8
 ; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
@@ -5594,9 +5591,8 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
-; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v2, v1
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
@@ -5625,7 +5621,7 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
-; GFX10-NEXT:    v_add_co_u32 v3, s0, 0x80000000, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc_lo
 ; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5652,12 +5648,12 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    v_cmp_gt_i64_e64 s0, s[2:3], 0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
-; GFX11-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX11-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s0
 ; GFX11-NEXT:    s_and_b32 s0, 1, s4
-; GFX11-NEXT:    v_add_co_u32 v3, null, 0x80000000, v2
+; GFX11-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc_lo
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5805,9 +5801,8 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v19
-; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v2, v1
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v16, v2, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v17, v2, vcc
@@ -5831,8 +5826,8 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v4, v5, v4
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v6, 31, v11
-; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, 0x80000000, v6
 ; GFX9-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX9-NEXT:    v_add_u32_e32 v7, 0x80000000, v6
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v8, v6, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v9, v6, vcc
@@ -5877,18 +5872,18 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v21
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[14:15]
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v19
-; GFX10-NEXT:    v_add_co_u32 v7, s5, 0x80000000, v6
+; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0x80000000, v6
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc_lo
-; GFX10-NEXT:    v_add_co_u32 v4, s4, 0x80000000, v3
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-NEXT:    v_xor_b32_e32 v1, v2, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, v16, v3, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, v18, v3, vcc_lo
-; GFX10-NEXT:    v_and_b32_e32 v5, 1, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, v17, v3, vcc_lo
+; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v19
+; GFX10-NEXT:    v_and_b32_e32 v3, 1, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v4, 0x80000000, v2
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v16, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v17, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v18, v2, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e64 s4, 0, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v19, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s4, 0, v5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, v6, s4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, v9, v6, s4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, v20, v6, s4
@@ -5931,18 +5926,16 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v21
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[14:15]
-; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v19
+; GFX11-NEXT:    v_dual_cndmask_b32 v2, v5, v4 :: v_dual_add_nc_u32 v7, 0x80000000, v6
+; GFX11-NEXT:    v_xor_b32_e32 v1, v2, v1
+; GFX11-NEXT:    v_ashrrev_i32_e32 v2, 31, v19
 ; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT:    v_add_co_u32 v7, null, 0x80000000, v6
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc_lo
+; GFX11-NEXT:    v_add_nc_u32_e32 v4, 0x80000000, v2
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_add_co_u32 v4, null, 0x80000000, v3
-; GFX11-NEXT:    v_xor_b32_e32 v1, v2, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v16, v3, vcc_lo
-; GFX11-NEXT:    v_dual_cndmask_b32 v2, v18, v3 :: v_dual_and_b32 v5, 1, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v17, v3, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, v19, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, v5
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, v16, v2 :: v_dual_and_b32 v3, 1, v1
+; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v17, v2, vcc_lo
+; GFX11-NEXT:    v_dual_cndmask_b32 v2, v18, v2 :: v_dual_cndmask_b32 v3, v19, v4
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, v6, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v5, v9, v6, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v6, v20, v6, s0
@@ -5978,7 +5971,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX6-NEXT:    s_ashr_i32 s0, s19, 31
 ; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX6-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX6-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s16
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s17
@@ -6013,7 +6006,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX6-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX6-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX6-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s1
@@ -6066,7 +6059,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s0, s19, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX8-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX8-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s16
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s17
@@ -6107,7 +6100,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX8-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX8-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s1
@@ -6160,7 +6153,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s0, s19, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_add_u32 s1, s0, 0x80000000
+; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s16
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s17
@@ -6201,7 +6194,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_add_u32 s5, s4, 0x80000000
+; GFX9-NEXT:    s_add_i32 s5, s4, 0x80000000
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
@@ -6244,7 +6237,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX10-NEXT:    s_ashr_i32 s8, s17, 31
 ; GFX10-NEXT:    s_and_b32 s1, 1, s1
-; GFX10-NEXT:    s_add_u32 s9, s8, 0x80000000
+; GFX10-NEXT:    s_add_i32 s9, s8, 0x80000000
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s1
@@ -6273,7 +6266,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
 ; GFX10-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX10-NEXT:    s_and_b32 s5, 1, s5
-; GFX10-NEXT:    s_add_u32 s0, s4, 0x80000000
+; GFX10-NEXT:    s_add_i32 s0, s4, 0x80000000
 ; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s6
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc_lo
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s5
@@ -6326,7 +6319,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX11-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX11-NEXT:    s_ashr_i32 s8, s19, 31
 ; GFX11-NEXT:    s_and_b32 s1, 1, s1
-; GFX11-NEXT:    s_add_u32 s9, s8, 0x80000000
+; GFX11-NEXT:    s_add_i32 s9, s8, 0x80000000
 ; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s1
@@ -6357,7 +6350,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc_lo
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s6
 ; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s5
-; GFX11-NEXT:    s_add_u32 s0, s4, 0x80000000
+; GFX11-NEXT:    s_add_i32 s0, s4, 0x80000000
 ; GFX11-NEXT:    v_dual_cndmask_b32 v2, v4, v3 :: v_dual_mov_b32 v3, s16
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s18
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index 887c43f..d155513 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -2062,13 +2062,9 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v5
 ; GISEL-NEXT:    v_mul_hi_u32 v5, 0, v5
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v16, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
@@ -2077,10 +2073,6 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v17
 ; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 5c6bb6d..07480a0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -2480,13 +2480,9 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v5
 ; GISEL-NEXT:    v_mul_hi_u32 v5, 0, v5
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v16, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
@@ -2495,10 +2491,6 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v17
 ; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
new file mode 100644
index 0000000..b2311a8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -0,0 +1,1510 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GISEL %s
+
+define i128 @fptosi_f64_to_i128(double %x) {
+; SDAG-LABEL: fptosi_f64_to_i128:
+; SDAG:       ; %bb.0: ; %fp-to-i-entry
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mov_b32_e32 v5, v1
+; SDAG-NEXT:    v_bfe_u32 v6, v5, 20, 11
+; SDAG-NEXT:    v_mov_b32_e32 v7, 0
+; SDAG-NEXT:    s_mov_b64 s[4:5], 0x3fe
+; SDAG-NEXT:    v_mov_b32_e32 v4, v0
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB0_10
+; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-end
+; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
+; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v7, vcc
+; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v7, vcc
+; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v7, vcc
+; SDAG-NEXT:    s_mov_b64 s[6:7], 0xffffff7f
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; SDAG-NEXT:    v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[6:7]
+; SDAG-NEXT:    s_cbranch_execz .LBB0_7
+; SDAG-NEXT:  ; %bb.2: ; %fp-to-i-if-end9
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT:    v_add_co_u32_e32 v9, vcc, -1, v0
+; SDAG-NEXT:    v_addc_co_u32_e64 v10, s[6:7], 0, -1, vcc
+; SDAG-NEXT:    s_mov_b64 s[6:7], 0x432
+; SDAG-NEXT:    v_and_b32_e32 v0, 0xfffff, v5
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 0, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v11, -1, 1, s[4:5]
+; SDAG-NEXT:    v_or_b32_e32 v5, 0x100000, v0
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_xor_b64 s[12:13], exec, s[6:7]
+; SDAG-NEXT:    s_cbranch_execz .LBB0_4
+; SDAG-NEXT:  ; %bb.3: ; %fp-to-i-if-else
+; SDAG-NEXT:    v_sub_u32_e32 v0, 0x473, v6
+; SDAG-NEXT:    v_add_u32_e32 v2, 0xfffffb8d, v6
+; SDAG-NEXT:    v_add_u32_e32 v7, 0xfffffbcd, v6
+; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[4:5]
+; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[4:5]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v7
+; SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v1, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v7, v[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e32 v12, 0, v0, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v1, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v12, v11, 0
+; SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; SDAG-NEXT:    v_mul_lo_u32 v13, v8, v2
+; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[6:7], v11, v2, 0
+; SDAG-NEXT:    v_mov_b32_e32 v2, v1
+; SDAG-NEXT:    v_mul_lo_u32 v6, v11, v6
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v7, v11, v[2:3]
+; SDAG-NEXT:    v_mul_lo_u32 v10, v10, v12
+; SDAG-NEXT:    v_add3_u32 v5, v5, v6, v13
+; SDAG-NEXT:    v_mov_b32_e32 v6, v2
+; SDAG-NEXT:    v_mov_b32_e32 v2, v3
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v12, v8, v[1:2]
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v9, v12, v[4:5]
+; SDAG-NEXT:    v_add_co_u32_e32 v5, vcc, v6, v2
+; SDAG-NEXT:    v_addc_co_u32_e64 v6, s[6:7], 0, 0, vcc
+; SDAG-NEXT:    v_mul_lo_u32 v9, v9, v7
+; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[6:7], v7, v8, v[5:6]
+; SDAG-NEXT:    ; implicit-def: $vgpr11
+; SDAG-NEXT:    ; implicit-def: $vgpr8
+; SDAG-NEXT:    v_add3_u32 v4, v10, v4, v9
+; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v5, v3
+; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, v6, v4, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr6_vgpr7
+; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT:    ; implicit-def: $vgpr9
+; SDAG-NEXT:    ; implicit-def: $vgpr10
+; SDAG-NEXT:  .LBB0_4: ; %Flow
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[12:13], s[12:13]
+; SDAG-NEXT:    s_cbranch_execz .LBB0_6
+; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
+; SDAG-NEXT:    v_sub_u32_e32 v2, 0x433, v6
+; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v6, v0, v4, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v5, v1, v5, s[6:7]
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v6, v11, 0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v5, v11, v[1:2]
+; SDAG-NEXT:    v_mov_b32_e32 v7, v4
+; SDAG-NEXT:    v_mov_b32_e32 v4, v2
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v6, v8, v[3:4]
+; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v7, v2
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[6:7], 0, 0, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v5, v8, v[2:3]
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v9, v6, v[2:3]
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v10, v6, v[3:4]
+; SDAG-NEXT:    v_mad_i32_i24 v3, v9, v5, v3
+; SDAG-NEXT:  .LBB0_6: ; %Flow1
+; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
+; SDAG-NEXT:  .LBB0_7: ; %Flow2
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[10:11]
+; SDAG-NEXT:  ; %bb.8: ; %fp-to-i-if-then5
+; SDAG-NEXT:    v_bfrev_b32_e32 v0, 1
+; SDAG-NEXT:    v_bfrev_b32_e32 v1, -2
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v3, v0, v1, s[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v0, v2
+; SDAG-NEXT:    v_mov_b32_e32 v1, v2
+; SDAG-NEXT:  ; %bb.9: ; %Flow3
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:  .LBB0_10: ; %fp-to-i-cleanup
+; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fptosi_f64_to_i128:
+; GISEL:       ; %bb.0: ; %fp-to-i-entry
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v5, v1
+; GISEL-NEXT:    v_mov_b32_e32 v4, v0
+; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 20, v5
+; GISEL-NEXT:    v_and_b32_e32 v6, 0x7ff, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x3ff
+; GISEL-NEXT:    s_mov_b64 s[4:5], 0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    v_mov_b32_e32 v7, 0
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB0_10
+; GISEL-NEXT:  ; %bb.1: ; %fp-to-i-if-end
+; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
+; GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffff80
+; GISEL-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
+; GISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GISEL-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
+; GISEL-NEXT:    v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
+; GISEL-NEXT:    v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, 1, v0, vcc
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_xor_b64 s[14:15], exec, s[6:7]
+; GISEL-NEXT:    s_cbranch_execz .LBB0_7
+; GISEL-NEXT:  ; %bb.2: ; %fp-to-i-if-end9
+; GISEL-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[6:7]
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v2, 1, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[6:7]
+; GISEL-NEXT:    v_lshlrev_b16_e32 v3, 2, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v8, 3, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v9, 4, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v10, 5, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v11, 6, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v12, 7, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v13, 8, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v14, 9, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v15, 10, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v16, 11, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v17, 12, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v18, 13, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v19, 14, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v20, 15, v0
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v2
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v3
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v8
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v8
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v9
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v9
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v10
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v10
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v11
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v11
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v12
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v12
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v13
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v13
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v14
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v14
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v15
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v15
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v16
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v16
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v17
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v17
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v18
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v18
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v19
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v19
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v20
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v20
+; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GISEL-NEXT:    v_lshl_or_b32 v10, v0, 16, v0
+; GISEL-NEXT:    v_or3_b32 v8, v1, v2, 1
+; GISEL-NEXT:    v_or3_b32 v9, v0, v2, 0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x433
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    v_and_b32_e32 v2, 0xfffff, v5
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT:    v_or_b32_e32 v5, 0x100000, v2
+; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
+; GISEL-NEXT:    s_cbranch_execz .LBB0_4
+; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
+; GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[4:5]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
+; GISEL-NEXT:    v_subrev_u32_e32 v7, 64, v6
+; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v6
+; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
+; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v7, v[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, v2, 0, s[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v13, v8, v[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0
+; GISEL-NEXT:    v_mov_b32_e32 v2, v6
+; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v10
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[8:9], v11, v9, v[1:2]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v12, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[10:11], v12, v8, v[1:2]
+; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
+; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v13, v9, v[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    ; implicit-def: $vgpr10
+; GISEL-NEXT:    ; implicit-def: $vgpr9
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7]
+; GISEL-NEXT:    ; implicit-def: $vgpr6
+; GISEL-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GISEL-NEXT:    ; implicit-def: $vgpr8
+; GISEL-NEXT:  .LBB0_4: ; %Flow
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[16:17]
+; GISEL-NEXT:    s_cbranch_execz .LBB0_6
+; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
+; GISEL-NEXT:    v_sub_co_u32_e32 v6, vcc, 0x433, v6
+; GISEL-NEXT:    v_subrev_u32_e32 v2, 64, v6
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v6, v[4:5]
+; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, 0
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v4, v10, 0
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v1, v5, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v4, v8, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v5, v9, v[2:3]
+; GISEL-NEXT:    v_mul_lo_u32 v6, v5, v10
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], vcc, v4, v9, v[1:2]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v10
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v5, v8, v[1:2]
+; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[6:7], v3, v4, s[6:7]
+; GISEL-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v6, vcc
+; GISEL-NEXT:  .LBB0_6: ; %Flow1
+; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT:  .LBB0_7: ; %Flow2
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[14:15]
+; GISEL-NEXT:    s_cbranch_execz .LBB0_9
+; GISEL-NEXT:  ; %bb.8: ; %fp-to-i-if-then5
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT:    v_and_b32_e32 v1, 1, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 1, v1
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 2, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 3, v1
+; GISEL-NEXT:    v_or_b32_e32 v2, v1, v2
+; GISEL-NEXT:    v_or3_b32 v0, v0, v3, v4
+; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 4, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v6, 5, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v3, v4
+; GISEL-NEXT:    v_or3_b32 v0, v0, v5, v6
+; GISEL-NEXT:    v_lshlrev_b32_e32 v7, 6, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v8, 7, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v5, v6
+; GISEL-NEXT:    v_or3_b32 v0, v0, v7, v8
+; GISEL-NEXT:    v_lshlrev_b32_e32 v9, 8, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v10, 9, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v7, v8
+; GISEL-NEXT:    v_or3_b32 v0, v0, v9, v10
+; GISEL-NEXT:    v_lshlrev_b32_e32 v11, 10, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v12, 11, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v9, v10
+; GISEL-NEXT:    v_or3_b32 v0, v0, v11, v12
+; GISEL-NEXT:    v_lshlrev_b32_e32 v13, 12, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v14, 13, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v11, v12
+; GISEL-NEXT:    v_or3_b32 v0, v0, v13, v14
+; GISEL-NEXT:    v_lshlrev_b32_e32 v15, 14, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v16, 15, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v13, v14
+; GISEL-NEXT:    v_or3_b32 v0, v0, v15, v16
+; GISEL-NEXT:    v_lshlrev_b32_e32 v17, 16, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v18, 17, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v15, v16
+; GISEL-NEXT:    v_or3_b32 v0, v0, v17, v18
+; GISEL-NEXT:    v_lshlrev_b32_e32 v19, 18, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v20, 19, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v17, v18
+; GISEL-NEXT:    v_or3_b32 v0, v0, v19, v20
+; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 20, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 21, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v19, v20
+; GISEL-NEXT:    v_or3_b32 v0, v0, v3, v4
+; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 22, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v6, 23, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v3, v4
+; GISEL-NEXT:    v_or3_b32 v0, v0, v5, v6
+; GISEL-NEXT:    v_lshlrev_b32_e32 v7, 24, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v8, 25, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v5, v6
+; GISEL-NEXT:    v_or3_b32 v0, v0, v7, v8
+; GISEL-NEXT:    v_lshlrev_b32_e32 v9, 26, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v10, 27, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v7, v8
+; GISEL-NEXT:    v_or3_b32 v0, v0, v9, v10
+; GISEL-NEXT:    v_lshlrev_b32_e32 v11, 28, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v12, 29, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v9, v10
+; GISEL-NEXT:    v_or3_b32 v0, v0, v11, v12
+; GISEL-NEXT:    v_lshlrev_b32_e32 v13, 30, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 31, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v11, v12
+; GISEL-NEXT:    v_or3_b32 v0, v0, v13, v1
+; GISEL-NEXT:    v_or3_b32 v1, v2, v13, v1
+; GISEL-NEXT:    v_add_u32_e32 v3, 0x80000000, v1
+; GISEL-NEXT:    v_mov_b32_e32 v2, v1
+; GISEL-NEXT:  .LBB0_9: ; %Flow3
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:  .LBB0_10: ; %fp-to-i-cleanup
+; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %cvt = fptosi double %x to i128
+  ret i128 %cvt
+}
+
+define i128 @fptoui_f64_to_i128(double %x) {
+; SDAG-LABEL: fptoui_f64_to_i128:
+; SDAG:       ; %bb.0: ; %fp-to-i-entry
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mov_b32_e32 v5, v1
+; SDAG-NEXT:    v_bfe_u32 v6, v5, 20, 11
+; SDAG-NEXT:    v_mov_b32_e32 v7, 0
+; SDAG-NEXT:    s_mov_b64 s[4:5], 0x3fe
+; SDAG-NEXT:    v_mov_b32_e32 v4, v0
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB1_10
+; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-end
+; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
+; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v7, vcc
+; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v7, vcc
+; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v7, vcc
+; SDAG-NEXT:    s_mov_b64 s[6:7], 0xffffff7f
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; SDAG-NEXT:    v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[6:7]
+; SDAG-NEXT:    s_cbranch_execz .LBB1_7
+; SDAG-NEXT:  ; %bb.2: ; %fp-to-i-if-end9
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT:    v_add_co_u32_e32 v9, vcc, -1, v0
+; SDAG-NEXT:    v_addc_co_u32_e64 v10, s[6:7], 0, -1, vcc
+; SDAG-NEXT:    s_mov_b64 s[6:7], 0x432
+; SDAG-NEXT:    v_and_b32_e32 v0, 0xfffff, v5
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 0, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v11, -1, 1, s[4:5]
+; SDAG-NEXT:    v_or_b32_e32 v5, 0x100000, v0
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_xor_b64 s[12:13], exec, s[6:7]
+; SDAG-NEXT:    s_cbranch_execz .LBB1_4
+; SDAG-NEXT:  ; %bb.3: ; %fp-to-i-if-else
+; SDAG-NEXT:    v_sub_u32_e32 v0, 0x473, v6
+; SDAG-NEXT:    v_add_u32_e32 v2, 0xfffffb8d, v6
+; SDAG-NEXT:    v_add_u32_e32 v7, 0xfffffbcd, v6
+; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[4:5]
+; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[4:5]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v7
+; SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v1, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v7, v[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e32 v12, 0, v0, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v1, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v12, v11, 0
+; SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; SDAG-NEXT:    v_mul_lo_u32 v13, v8, v2
+; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[6:7], v11, v2, 0
+; SDAG-NEXT:    v_mov_b32_e32 v2, v1
+; SDAG-NEXT:    v_mul_lo_u32 v6, v11, v6
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v7, v11, v[2:3]
+; SDAG-NEXT:    v_mul_lo_u32 v10, v10, v12
+; SDAG-NEXT:    v_add3_u32 v5, v5, v6, v13
+; SDAG-NEXT:    v_mov_b32_e32 v6, v2
+; SDAG-NEXT:    v_mov_b32_e32 v2, v3
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v12, v8, v[1:2]
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v9, v12, v[4:5]
+; SDAG-NEXT:    v_add_co_u32_e32 v5, vcc, v6, v2
+; SDAG-NEXT:    v_addc_co_u32_e64 v6, s[6:7], 0, 0, vcc
+; SDAG-NEXT:    v_mul_lo_u32 v9, v9, v7
+; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[6:7], v7, v8, v[5:6]
+; SDAG-NEXT:    ; implicit-def: $vgpr11
+; SDAG-NEXT:    ; implicit-def: $vgpr8
+; SDAG-NEXT:    v_add3_u32 v4, v10, v4, v9
+; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v5, v3
+; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, v6, v4, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr6_vgpr7
+; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT:    ; implicit-def: $vgpr9
+; SDAG-NEXT:    ; implicit-def: $vgpr10
+; SDAG-NEXT:  .LBB1_4: ; %Flow
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[12:13], s[12:13]
+; SDAG-NEXT:    s_cbranch_execz .LBB1_6
+; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
+; SDAG-NEXT:    v_sub_u32_e32 v2, 0x433, v6
+; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v6, v0, v4, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v5, v1, v5, s[6:7]
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v6, v11, 0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v5, v11, v[1:2]
+; SDAG-NEXT:    v_mov_b32_e32 v7, v4
+; SDAG-NEXT:    v_mov_b32_e32 v4, v2
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v6, v8, v[3:4]
+; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v7, v2
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[6:7], 0, 0, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v5, v8, v[2:3]
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v9, v6, v[2:3]
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v10, v6, v[3:4]
+; SDAG-NEXT:    v_mad_i32_i24 v3, v9, v5, v3
+; SDAG-NEXT:  .LBB1_6: ; %Flow1
+; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
+; SDAG-NEXT:  .LBB1_7: ; %Flow2
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[10:11]
+; SDAG-NEXT:  ; %bb.8: ; %fp-to-i-if-then5
+; SDAG-NEXT:    v_bfrev_b32_e32 v0, 1
+; SDAG-NEXT:    v_bfrev_b32_e32 v1, -2
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v3, v0, v1, s[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v0, v2
+; SDAG-NEXT:    v_mov_b32_e32 v1, v2
+; SDAG-NEXT:  ; %bb.9: ; %Flow3
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:  .LBB1_10: ; %fp-to-i-cleanup
+; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fptoui_f64_to_i128:
+; GISEL:       ; %bb.0: ; %fp-to-i-entry
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v5, v1
+; GISEL-NEXT:    v_mov_b32_e32 v4, v0
+; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 20, v5
+; GISEL-NEXT:    v_and_b32_e32 v6, 0x7ff, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x3ff
+; GISEL-NEXT:    s_mov_b64 s[4:5], 0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    v_mov_b32_e32 v7, 0
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB1_10
+; GISEL-NEXT:  ; %bb.1: ; %fp-to-i-if-end
+; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
+; GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffff80
+; GISEL-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
+; GISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GISEL-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
+; GISEL-NEXT:    v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
+; GISEL-NEXT:    v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, 1, v0, vcc
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_xor_b64 s[14:15], exec, s[6:7]
+; GISEL-NEXT:    s_cbranch_execz .LBB1_7
+; GISEL-NEXT:  ; %bb.2: ; %fp-to-i-if-end9
+; GISEL-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[6:7]
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v2, 1, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[6:7]
+; GISEL-NEXT:    v_lshlrev_b16_e32 v3, 2, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v8, 3, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v9, 4, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v10, 5, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v11, 6, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v12, 7, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v13, 8, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v14, 9, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v15, 10, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v16, 11, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v17, 12, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v18, 13, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v19, 14, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v20, 15, v0
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v2
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v3
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v8
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v8
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v9
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v9
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v10
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v10
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v11
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v11
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v12
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v12
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v13
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v13
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v14
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v14
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v15
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v15
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v16
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v16
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v17
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v17
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v18
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v18
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v19
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v19
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v20
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v20
+; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GISEL-NEXT:    v_lshl_or_b32 v10, v0, 16, v0
+; GISEL-NEXT:    v_or3_b32 v8, v1, v2, 1
+; GISEL-NEXT:    v_or3_b32 v9, v0, v2, 0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x433
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    v_and_b32_e32 v2, 0xfffff, v5
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT:    v_or_b32_e32 v5, 0x100000, v2
+; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
+; GISEL-NEXT:    s_cbranch_execz .LBB1_4
+; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
+; GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[4:5]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
+; GISEL-NEXT:    v_subrev_u32_e32 v7, 64, v6
+; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v6
+; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
+; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v7, v[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, v2, 0, s[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v13, v8, v[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0
+; GISEL-NEXT:    v_mov_b32_e32 v2, v6
+; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v10
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[8:9], v11, v9, v[1:2]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v12, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[10:11], v12, v8, v[1:2]
+; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
+; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v13, v9, v[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    ; implicit-def: $vgpr10
+; GISEL-NEXT:    ; implicit-def: $vgpr9
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7]
+; GISEL-NEXT:    ; implicit-def: $vgpr6
+; GISEL-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GISEL-NEXT:    ; implicit-def: $vgpr8
+; GISEL-NEXT:  .LBB1_4: ; %Flow
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[16:17]
+; GISEL-NEXT:    s_cbranch_execz .LBB1_6
+; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
+; GISEL-NEXT:    v_sub_co_u32_e32 v6, vcc, 0x433, v6
+; GISEL-NEXT:    v_subrev_u32_e32 v2, 64, v6
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v6, v[4:5]
+; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, 0
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v4, v10, 0
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v1, v5, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v4, v8, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v5, v9, v[2:3]
+; GISEL-NEXT:    v_mul_lo_u32 v6, v5, v10
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], vcc, v4, v9, v[1:2]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v10
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v5, v8, v[1:2]
+; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[6:7], v3, v4, s[6:7]
+; GISEL-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v6, vcc
+; GISEL-NEXT:  .LBB1_6: ; %Flow1
+; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT:  .LBB1_7: ; %Flow2
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[14:15]
+; GISEL-NEXT:    s_cbranch_execz .LBB1_9
+; GISEL-NEXT:  ; %bb.8: ; %fp-to-i-if-then5
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT:    v_and_b32_e32 v1, 1, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 1, v1
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 2, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 3, v1
+; GISEL-NEXT:    v_or_b32_e32 v2, v1, v2
+; GISEL-NEXT:    v_or3_b32 v0, v0, v3, v4
+; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 4, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v6, 5, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v3, v4
+; GISEL-NEXT:    v_or3_b32 v0, v0, v5, v6
+; GISEL-NEXT:    v_lshlrev_b32_e32 v7, 6, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v8, 7, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v5, v6
+; GISEL-NEXT:    v_or3_b32 v0, v0, v7, v8
+; GISEL-NEXT:    v_lshlrev_b32_e32 v9, 8, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v10, 9, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v7, v8
+; GISEL-NEXT:    v_or3_b32 v0, v0, v9, v10
+; GISEL-NEXT:    v_lshlrev_b32_e32 v11, 10, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v12, 11, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v9, v10
+; GISEL-NEXT:    v_or3_b32 v0, v0, v11, v12
+; GISEL-NEXT:    v_lshlrev_b32_e32 v13, 12, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v14, 13, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v11, v12
+; GISEL-NEXT:    v_or3_b32 v0, v0, v13, v14
+; GISEL-NEXT:    v_lshlrev_b32_e32 v15, 14, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v16, 15, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v13, v14
+; GISEL-NEXT:    v_or3_b32 v0, v0, v15, v16
+; GISEL-NEXT:    v_lshlrev_b32_e32 v17, 16, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v18, 17, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v15, v16
+; GISEL-NEXT:    v_or3_b32 v0, v0, v17, v18
+; GISEL-NEXT:    v_lshlrev_b32_e32 v19, 18, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v20, 19, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v17, v18
+; GISEL-NEXT:    v_or3_b32 v0, v0, v19, v20
+; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 20, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 21, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v19, v20
+; GISEL-NEXT:    v_or3_b32 v0, v0, v3, v4
+; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 22, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v6, 23, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v3, v4
+; GISEL-NEXT:    v_or3_b32 v0, v0, v5, v6
+; GISEL-NEXT:    v_lshlrev_b32_e32 v7, 24, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v8, 25, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v5, v6
+; GISEL-NEXT:    v_or3_b32 v0, v0, v7, v8
+; GISEL-NEXT:    v_lshlrev_b32_e32 v9, 26, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v10, 27, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v7, v8
+; GISEL-NEXT:    v_or3_b32 v0, v0, v9, v10
+; GISEL-NEXT:    v_lshlrev_b32_e32 v11, 28, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v12, 29, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v9, v10
+; GISEL-NEXT:    v_or3_b32 v0, v0, v11, v12
+; GISEL-NEXT:    v_lshlrev_b32_e32 v13, 30, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 31, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v11, v12
+; GISEL-NEXT:    v_or3_b32 v0, v0, v13, v1
+; GISEL-NEXT:    v_or3_b32 v1, v2, v13, v1
+; GISEL-NEXT:    v_add_u32_e32 v3, 0x80000000, v1
+; GISEL-NEXT:    v_mov_b32_e32 v2, v1
+; GISEL-NEXT:  .LBB1_9: ; %Flow3
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:  .LBB1_10: ; %fp-to-i-cleanup
+; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %cvt = fptoui double %x to i128
+  ret i128 %cvt
+}
+
+define i128 @fptosi_f32_to_i128(float %x) {
+; SDAG-LABEL: fptosi_f32_to_i128:
+; SDAG:       ; %bb.0: ; %fp-to-i-entry
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mov_b32_e32 v4, v0
+; SDAG-NEXT:    v_bfe_u32 v5, v4, 23, 8
+; SDAG-NEXT:    s_movk_i32 s4, 0x7e
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_mov_b32_e32 v6, 0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v5
+; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB2_10
+; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-end
+; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
+; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v6, vcc
+; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
+; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
+; SDAG-NEXT:    s_mov_b64 s[6:7], 0xffffff7f
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; SDAG-NEXT:    v_cmp_lt_i32_e64 s[4:5], -1, v4
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[6:7]
+; SDAG-NEXT:    s_cbranch_execz .LBB2_7
+; SDAG-NEXT:  ; %bb.2: ; %fp-to-i-if-end9
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT:    v_add_co_u32_e32 v9, vcc, -1, v0
+; SDAG-NEXT:    v_addc_co_u32_e64 v11, s[6:7], 0, -1, vcc
+; SDAG-NEXT:    s_mov_b64 s[6:7], 0x95
+; SDAG-NEXT:    v_and_b32_e32 v0, 0x7fffff, v4
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[5:6]
+; SDAG-NEXT:    v_mov_b32_e32 v7, 0
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 0, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v10, -1, 1, s[4:5]
+; SDAG-NEXT:    v_or_b32_e32 v6, 0x800000, v0
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_xor_b64 s[12:13], exec, s[6:7]
+; SDAG-NEXT:    s_cbranch_execz .LBB2_4
+; SDAG-NEXT:  ; %bb.3: ; %fp-to-i-if-else
+; SDAG-NEXT:    v_sub_u32_e32 v0, 0xd6, v5
+; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff2a, v5
+; SDAG-NEXT:    v_add_u32_e32 v4, 0xffffff6a, v5
+; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[6:7]
+; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v4
+; SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v1, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v4, v[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e32 v13, 0, v0, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v13, v10, 0
+; SDAG-NEXT:    v_mul_lo_u32 v14, v8, v2
+; SDAG-NEXT:    v_mul_lo_u32 v15, v10, v3
+; SDAG-NEXT:    v_mov_b32_e32 v6, v1
+; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[6:7], v12, v10, v[6:7]
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v10, v2, 0
+; SDAG-NEXT:    v_mov_b32_e32 v6, v5
+; SDAG-NEXT:    v_mov_b32_e32 v5, v7
+; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[6:7], v13, v8, v[4:5]
+; SDAG-NEXT:    v_add3_u32 v3, v3, v15, v14
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v9, v13, v[2:3]
+; SDAG-NEXT:    v_add_co_u32_e32 v5, vcc, v6, v5
+; SDAG-NEXT:    v_addc_co_u32_e64 v6, s[6:7], 0, 0, vcc
+; SDAG-NEXT:    v_mul_lo_u32 v3, v9, v12
+; SDAG-NEXT:    v_mul_lo_u32 v7, v11, v13
+; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[6:7], v12, v8, v[5:6]
+; SDAG-NEXT:    ; implicit-def: $vgpr10
+; SDAG-NEXT:    ; implicit-def: $vgpr8
+; SDAG-NEXT:    ; implicit-def: $vgpr9
+; SDAG-NEXT:    v_add3_u32 v3, v7, v2, v3
+; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v5, v1
+; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, v6, v3, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr5_vgpr6
+; SDAG-NEXT:    v_mov_b32_e32 v1, v4
+; SDAG-NEXT:    ; implicit-def: $vgpr6_vgpr7
+; SDAG-NEXT:  .LBB2_4: ; %Flow
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[12:13]
+; SDAG-NEXT:    s_cbranch_execz .LBB2_6
+; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
+; SDAG-NEXT:    v_sub_u32_e32 v2, 0x96, v5
+; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[6:7]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v3, v0, v6, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[12:13], v3, v10, 0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[12:13], v3, v8, v[1:2]
+; SDAG-NEXT:    v_mov_b32_e32 v1, v5
+; SDAG-NEXT:    v_mad_i64_i32 v[2:3], s[12:13], v9, v3, v[1:2]
+; SDAG-NEXT:    v_mov_b32_e32 v1, v4
+; SDAG-NEXT:  .LBB2_6: ; %Flow1
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:  .LBB2_7: ; %Flow2
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[10:11]
+; SDAG-NEXT:  ; %bb.8: ; %fp-to-i-if-then5
+; SDAG-NEXT:    v_bfrev_b32_e32 v0, 1
+; SDAG-NEXT:    v_bfrev_b32_e32 v1, -2
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v3, v0, v1, s[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v0, v2
+; SDAG-NEXT:    v_mov_b32_e32 v1, v2
+; SDAG-NEXT:  ; %bb.9: ; %Flow3
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:  .LBB2_10: ; %fp-to-i-cleanup
+; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fptosi_f32_to_i128:
+; GISEL:       ; %bb.0: ; %fp-to-i-entry
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v4, v0
+; GISEL-NEXT:    v_mov_b32_e32 v5, 0
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], 23, v[4:5]
+; GISEL-NEXT:    s_mov_b64 s[4:5], 0
+; GISEL-NEXT:    v_bfe_u32 v6, v0, 0, 8
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7f
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    v_mov_b32_e32 v7, v5
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB2_10
+; GISEL-NEXT:  ; %bb.1: ; %fp-to-i-if-end
+; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v6
+; GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffff80
+; GISEL-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
+; GISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GISEL-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
+; GISEL-NEXT:    v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
+; GISEL-NEXT:    v_cmp_lt_i32_e64 s[4:5], -1, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, 1, v0, vcc
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_xor_b64 s[14:15], exec, s[6:7]
+; GISEL-NEXT:    s_cbranch_execz .LBB2_7
+; GISEL-NEXT:  ; %bb.2: ; %fp-to-i-if-end9
+; GISEL-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[6:7]
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v2, 1, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[6:7]
+; GISEL-NEXT:    v_lshlrev_b16_e32 v3, 2, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v5, 3, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v8, 4, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v9, 5, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v10, 6, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v11, 7, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v12, 8, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v13, 9, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v14, 10, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v15, 11, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v16, 12, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v17, 13, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v18, 14, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v19, 15, v0
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v2
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v3
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v5
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v8
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v8
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v9
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v9
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v10
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v10
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v11
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v11
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v12
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v12
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v13
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v13
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v14
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v14
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v15
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v15
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v16
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v16
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v17
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v17
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v18
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v18
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v19
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v19
+; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GISEL-NEXT:    v_lshl_or_b32 v10, v0, 16, v0
+; GISEL-NEXT:    v_or3_b32 v9, v1, v2, 1
+; GISEL-NEXT:    v_or3_b32 v8, v0, v2, 0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x96
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    v_and_b32_e32 v2, 0x7fffff, v4
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT:    v_or_b32_e32 v4, 0x800000, v2
+; GISEL-NEXT:    v_mov_b32_e32 v5, 0
+; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
+; GISEL-NEXT:    s_cbranch_execz .LBB2_4
+; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
+; GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[4:5]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
+; GISEL-NEXT:    v_subrev_u32_e32 v7, 64, v6
+; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v6
+; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
+; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v7, v[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, v2, 0, s[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v13, v9, v[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v11, v9, 0
+; GISEL-NEXT:    v_mov_b32_e32 v2, v6
+; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v10
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[8:9], v11, v8, v[1:2]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v12, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[10:11], v12, v9, v[1:2]
+; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
+; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v13, v8, v[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    ; implicit-def: $vgpr10
+; GISEL-NEXT:    ; implicit-def: $vgpr8
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v9, v[6:7]
+; GISEL-NEXT:    ; implicit-def: $vgpr6
+; GISEL-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GISEL-NEXT:    ; implicit-def: $vgpr9
+; GISEL-NEXT:  .LBB2_4: ; %Flow
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[16:17]
+; GISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
+; GISEL-NEXT:    v_sub_co_u32_e32 v3, vcc, 0x96, v6
+; GISEL-NEXT:    v_subrev_u32_e32 v2, 64, v3
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v3, v[4:5]
+; GISEL-NEXT:    v_lshrrev_b64 v[1:2], v2, 0
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v4, v9, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v4, v10, 0
+; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v10
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], vcc, v4, v8, v[1:2]
+; GISEL-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GISEL-NEXT:  .LBB2_6: ; %Flow1
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:  .LBB2_7: ; %Flow2
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[14:15]
+; GISEL-NEXT:    s_cbranch_execz .LBB2_9
+; GISEL-NEXT:  ; %bb.8: ; %fp-to-i-if-then5
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT:    v_and_b32_e32 v1, 1, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 1, v1
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 2, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 3, v1
+; GISEL-NEXT:    v_or_b32_e32 v2, v1, v2
+; GISEL-NEXT:    v_or3_b32 v0, v0, v3, v4
+; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 4, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v6, 5, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v3, v4
+; GISEL-NEXT:    v_or3_b32 v0, v0, v5, v6
+; GISEL-NEXT:    v_lshlrev_b32_e32 v7, 6, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v8, 7, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v5, v6
+; GISEL-NEXT:    v_or3_b32 v0, v0, v7, v8
+; GISEL-NEXT:    v_lshlrev_b32_e32 v9, 8, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v10, 9, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v7, v8
+; GISEL-NEXT:    v_or3_b32 v0, v0, v9, v10
+; GISEL-NEXT:    v_lshlrev_b32_e32 v11, 10, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v12, 11, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v9, v10
+; GISEL-NEXT:    v_or3_b32 v0, v0, v11, v12
+; GISEL-NEXT:    v_lshlrev_b32_e32 v13, 12, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v14, 13, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v11, v12
+; GISEL-NEXT:    v_or3_b32 v0, v0, v13, v14
+; GISEL-NEXT:    v_lshlrev_b32_e32 v15, 14, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v16, 15, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v13, v14
+; GISEL-NEXT:    v_or3_b32 v0, v0, v15, v16
+; GISEL-NEXT:    v_lshlrev_b32_e32 v17, 16, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v18, 17, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v15, v16
+; GISEL-NEXT:    v_or3_b32 v0, v0, v17, v18
+; GISEL-NEXT:    v_lshlrev_b32_e32 v19, 18, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v20, 19, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v17, v18
+; GISEL-NEXT:    v_or3_b32 v0, v0, v19, v20
+; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 20, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 21, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v19, v20
+; GISEL-NEXT:    v_or3_b32 v0, v0, v3, v4
+; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 22, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v6, 23, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v3, v4
+; GISEL-NEXT:    v_or3_b32 v0, v0, v5, v6
+; GISEL-NEXT:    v_lshlrev_b32_e32 v7, 24, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v8, 25, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v5, v6
+; GISEL-NEXT:    v_or3_b32 v0, v0, v7, v8
+; GISEL-NEXT:    v_lshlrev_b32_e32 v9, 26, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v10, 27, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v7, v8
+; GISEL-NEXT:    v_or3_b32 v0, v0, v9, v10
+; GISEL-NEXT:    v_lshlrev_b32_e32 v11, 28, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v12, 29, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v9, v10
+; GISEL-NEXT:    v_or3_b32 v0, v0, v11, v12
+; GISEL-NEXT:    v_lshlrev_b32_e32 v13, 30, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 31, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v11, v12
+; GISEL-NEXT:    v_or3_b32 v0, v0, v13, v1
+; GISEL-NEXT:    v_or3_b32 v1, v2, v13, v1
+; GISEL-NEXT:    v_add_u32_e32 v3, 0x80000000, v1
+; GISEL-NEXT:    v_mov_b32_e32 v2, v1
+; GISEL-NEXT:  .LBB2_9: ; %Flow3
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:  .LBB2_10: ; %fp-to-i-cleanup
+; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %cvt = fptosi float %x to i128
+  ret i128 %cvt
+}
+
+define i128 @fptoui_f32_to_i128(float %x) {
+; SDAG-LABEL: fptoui_f32_to_i128:
+; SDAG:       ; %bb.0: ; %fp-to-i-entry
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mov_b32_e32 v4, v0
+; SDAG-NEXT:    v_bfe_u32 v5, v4, 23, 8
+; SDAG-NEXT:    s_movk_i32 s4, 0x7e
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_mov_b32_e32 v6, 0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-NEXT:    v_mov_b32_e32 v3, 0
+; SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v5
+; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB3_10
+; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-end
+; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
+; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v6, vcc
+; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
+; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
+; SDAG-NEXT:    s_mov_b64 s[6:7], 0xffffff7f
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; SDAG-NEXT:    v_cmp_lt_i32_e64 s[4:5], -1, v4
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[6:7]
+; SDAG-NEXT:    s_cbranch_execz .LBB3_7
+; SDAG-NEXT:  ; %bb.2: ; %fp-to-i-if-end9
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT:    v_add_co_u32_e32 v9, vcc, -1, v0
+; SDAG-NEXT:    v_addc_co_u32_e64 v11, s[6:7], 0, -1, vcc
+; SDAG-NEXT:    s_mov_b64 s[6:7], 0x95
+; SDAG-NEXT:    v_and_b32_e32 v0, 0x7fffff, v4
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[5:6]
+; SDAG-NEXT:    v_mov_b32_e32 v7, 0
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 0, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v10, -1, 1, s[4:5]
+; SDAG-NEXT:    v_or_b32_e32 v6, 0x800000, v0
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_xor_b64 s[12:13], exec, s[6:7]
+; SDAG-NEXT:    s_cbranch_execz .LBB3_4
+; SDAG-NEXT:  ; %bb.3: ; %fp-to-i-if-else
+; SDAG-NEXT:    v_sub_u32_e32 v0, 0xd6, v5
+; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff2a, v5
+; SDAG-NEXT:    v_add_u32_e32 v4, 0xffffff6a, v5
+; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[6:7]
+; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v4
+; SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v1, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v4, v[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e32 v13, 0, v0, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v13, v10, 0
+; SDAG-NEXT:    v_mul_lo_u32 v14, v8, v2
+; SDAG-NEXT:    v_mul_lo_u32 v15, v10, v3
+; SDAG-NEXT:    v_mov_b32_e32 v6, v1
+; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[6:7], v12, v10, v[6:7]
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v10, v2, 0
+; SDAG-NEXT:    v_mov_b32_e32 v6, v5
+; SDAG-NEXT:    v_mov_b32_e32 v5, v7
+; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[6:7], v13, v8, v[4:5]
+; SDAG-NEXT:    v_add3_u32 v3, v3, v15, v14
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v9, v13, v[2:3]
+; SDAG-NEXT:    v_add_co_u32_e32 v5, vcc, v6, v5
+; SDAG-NEXT:    v_addc_co_u32_e64 v6, s[6:7], 0, 0, vcc
+; SDAG-NEXT:    v_mul_lo_u32 v3, v9, v12
+; SDAG-NEXT:    v_mul_lo_u32 v7, v11, v13
+; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[6:7], v12, v8, v[5:6]
+; SDAG-NEXT:    ; implicit-def: $vgpr10
+; SDAG-NEXT:    ; implicit-def: $vgpr8
+; SDAG-NEXT:    ; implicit-def: $vgpr9
+; SDAG-NEXT:    v_add3_u32 v3, v7, v2, v3
+; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v5, v1
+; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, v6, v3, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr5_vgpr6
+; SDAG-NEXT:    v_mov_b32_e32 v1, v4
+; SDAG-NEXT:    ; implicit-def: $vgpr6_vgpr7
+; SDAG-NEXT:  .LBB3_4: ; %Flow
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[12:13]
+; SDAG-NEXT:    s_cbranch_execz .LBB3_6
+; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
+; SDAG-NEXT:    v_sub_u32_e32 v2, 0x96, v5
+; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[6:7]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v3, v0, v6, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[12:13], v3, v10, 0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[12:13], v3, v8, v[1:2]
+; SDAG-NEXT:    v_mov_b32_e32 v1, v5
+; SDAG-NEXT:    v_mad_i64_i32 v[2:3], s[12:13], v9, v3, v[1:2]
+; SDAG-NEXT:    v_mov_b32_e32 v1, v4
+; SDAG-NEXT:  .LBB3_6: ; %Flow1
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:  .LBB3_7: ; %Flow2
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[10:11]
+; SDAG-NEXT:  ; %bb.8: ; %fp-to-i-if-then5
+; SDAG-NEXT:    v_bfrev_b32_e32 v0, 1
+; SDAG-NEXT:    v_bfrev_b32_e32 v1, -2
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v3, v0, v1, s[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v0, v2
+; SDAG-NEXT:    v_mov_b32_e32 v1, v2
+; SDAG-NEXT:  ; %bb.9: ; %Flow3
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:  .LBB3_10: ; %fp-to-i-cleanup
+; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fptoui_f32_to_i128:
+; GISEL:       ; %bb.0: ; %fp-to-i-entry
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v4, v0
+; GISEL-NEXT:    v_mov_b32_e32 v5, 0
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], 23, v[4:5]
+; GISEL-NEXT:    s_mov_b64 s[4:5], 0
+; GISEL-NEXT:    v_bfe_u32 v6, v0, 0, 8
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7f
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    v_mov_b32_e32 v7, v5
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GISEL-NEXT:    v_mov_b32_e32 v3, s7
+; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB3_10
+; GISEL-NEXT:  ; %bb.1: ; %fp-to-i-if-end
+; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v6
+; GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffff80
+; GISEL-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
+; GISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GISEL-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
+; GISEL-NEXT:    v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
+; GISEL-NEXT:    v_cmp_lt_i32_e64 s[4:5], -1, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, 1, v0, vcc
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_xor_b64 s[14:15], exec, s[6:7]
+; GISEL-NEXT:    s_cbranch_execz .LBB3_7
+; GISEL-NEXT:  ; %bb.2: ; %fp-to-i-if-end9
+; GISEL-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[6:7]
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v2, 1, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[6:7]
+; GISEL-NEXT:    v_lshlrev_b16_e32 v3, 2, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v5, 3, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v8, 4, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v9, 5, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v10, 6, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v11, 7, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v12, 8, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v13, 9, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v14, 10, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v15, 11, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v16, 12, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v17, 13, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v18, 14, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v19, 15, v0
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v2
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v3
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v5
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v8
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v8
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v9
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v9
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v10
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v10
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v11
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v11
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v12
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v12
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v13
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v13
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v14
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v14
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v15
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v15
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v16
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v16
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v17
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v17
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v18
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v18
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v19
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v19
+; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GISEL-NEXT:    v_lshl_or_b32 v10, v0, 16, v0
+; GISEL-NEXT:    v_or3_b32 v9, v1, v2, 1
+; GISEL-NEXT:    v_or3_b32 v8, v0, v2, 0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x96
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    v_and_b32_e32 v2, 0x7fffff, v4
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT:    v_or_b32_e32 v4, 0x800000, v2
+; GISEL-NEXT:    v_mov_b32_e32 v5, 0
+; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
+; GISEL-NEXT:    s_cbranch_execz .LBB3_4
+; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
+; GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[4:5]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
+; GISEL-NEXT:    v_subrev_u32_e32 v7, 64, v6
+; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v6
+; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
+; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v7, v[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, v2, 0, s[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v13, v9, v[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v11, v9, 0
+; GISEL-NEXT:    v_mov_b32_e32 v2, v6
+; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v10
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[8:9], v11, v8, v[1:2]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v12, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[10:11], v12, v9, v[1:2]
+; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
+; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v13, v8, v[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    ; implicit-def: $vgpr10
+; GISEL-NEXT:    ; implicit-def: $vgpr8
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v9, v[6:7]
+; GISEL-NEXT:    ; implicit-def: $vgpr6
+; GISEL-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GISEL-NEXT:    ; implicit-def: $vgpr9
+; GISEL-NEXT:  .LBB3_4: ; %Flow
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[16:17]
+; GISEL-NEXT:    s_cbranch_execz .LBB3_6
+; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
+; GISEL-NEXT:    v_sub_co_u32_e32 v3, vcc, 0x96, v6
+; GISEL-NEXT:    v_subrev_u32_e32 v2, 64, v3
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v3, v[4:5]
+; GISEL-NEXT:    v_lshrrev_b64 v[1:2], v2, 0
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v4, v9, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v4, v10, 0
+; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v10
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], vcc, v4, v8, v[1:2]
+; GISEL-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GISEL-NEXT:  .LBB3_6: ; %Flow1
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:  .LBB3_7: ; %Flow2
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[14:15]
+; GISEL-NEXT:    s_cbranch_execz .LBB3_9
+; GISEL-NEXT:  ; %bb.8: ; %fp-to-i-if-then5
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT:    v_and_b32_e32 v1, 1, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 1, v1
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 2, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 3, v1
+; GISEL-NEXT:    v_or_b32_e32 v2, v1, v2
+; GISEL-NEXT:    v_or3_b32 v0, v0, v3, v4
+; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 4, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v6, 5, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v3, v4
+; GISEL-NEXT:    v_or3_b32 v0, v0, v5, v6
+; GISEL-NEXT:    v_lshlrev_b32_e32 v7, 6, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v8, 7, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v5, v6
+; GISEL-NEXT:    v_or3_b32 v0, v0, v7, v8
+; GISEL-NEXT:    v_lshlrev_b32_e32 v9, 8, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v10, 9, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v7, v8
+; GISEL-NEXT:    v_or3_b32 v0, v0, v9, v10
+; GISEL-NEXT:    v_lshlrev_b32_e32 v11, 10, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v12, 11, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v9, v10
+; GISEL-NEXT:    v_or3_b32 v0, v0, v11, v12
+; GISEL-NEXT:    v_lshlrev_b32_e32 v13, 12, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v14, 13, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v11, v12
+; GISEL-NEXT:    v_or3_b32 v0, v0, v13, v14
+; GISEL-NEXT:    v_lshlrev_b32_e32 v15, 14, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v16, 15, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v13, v14
+; GISEL-NEXT:    v_or3_b32 v0, v0, v15, v16
+; GISEL-NEXT:    v_lshlrev_b32_e32 v17, 16, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v18, 17, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v15, v16
+; GISEL-NEXT:    v_or3_b32 v0, v0, v17, v18
+; GISEL-NEXT:    v_lshlrev_b32_e32 v19, 18, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v20, 19, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v17, v18
+; GISEL-NEXT:    v_or3_b32 v0, v0, v19, v20
+; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 20, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 21, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v19, v20
+; GISEL-NEXT:    v_or3_b32 v0, v0, v3, v4
+; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 22, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v6, 23, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v3, v4
+; GISEL-NEXT:    v_or3_b32 v0, v0, v5, v6
+; GISEL-NEXT:    v_lshlrev_b32_e32 v7, 24, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v8, 25, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v5, v6
+; GISEL-NEXT:    v_or3_b32 v0, v0, v7, v8
+; GISEL-NEXT:    v_lshlrev_b32_e32 v9, 26, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v10, 27, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v7, v8
+; GISEL-NEXT:    v_or3_b32 v0, v0, v9, v10
+; GISEL-NEXT:    v_lshlrev_b32_e32 v11, 28, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v12, 29, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v9, v10
+; GISEL-NEXT:    v_or3_b32 v0, v0, v11, v12
+; GISEL-NEXT:    v_lshlrev_b32_e32 v13, 30, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 31, v1
+; GISEL-NEXT:    v_or3_b32 v2, v2, v11, v12
+; GISEL-NEXT:    v_or3_b32 v0, v0, v13, v1
+; GISEL-NEXT:    v_or3_b32 v1, v2, v13, v1
+; GISEL-NEXT:    v_add_u32_e32 v3, 0x80000000, v1
+; GISEL-NEXT:    v_mov_b32_e32 v2, v1
+; GISEL-NEXT:  .LBB3_9: ; %Flow3
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:  .LBB3_10: ; %fp-to-i-cleanup
+; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %cvt = fptoui float %x to i128
+  ret i128 %cvt
+}
+
+define i128 @fptosi_f16_to_i128(half %x) {
+; GCN-LABEL: fptosi_f16_to_i128:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GCN-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GCN-NEXT:    v_mov_b32_e32 v2, v1
+; GCN-NEXT:    v_mov_b32_e32 v3, v1
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %cvt = fptosi half %x to i128
+  ret i128 %cvt
+}
+
+define i128 @fptoui_f16_to_i128(half %x) {
+; GCN-LABEL: fptoui_f16_to_i128:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-NEXT:    v_mov_b32_e32 v2, 0
+; GCN-NEXT:    v_mov_b32_e32 v3, 0
+; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %cvt = fptoui half %x to i128
+  ret i128 %cvt
+}
+
+; FIXME: ExpandLargeFpConvert asserts on bfloat
+; define i128 @fptosi_bf16_to_i128(bfloat %x) {
+;   %cvt = fptosi bfloat %x to i128
+;   ret i128 %cvt
+; }
+
+; define i128 @fptoui_bf16_to_i128(bfloat %x) {
+;   %cvt = fptoui bfloat %x to i128
+;   ret i128 %cvt
+; }
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
new file mode 100644
index 0000000..bfeb214
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -0,0 +1,1618 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GISEL %s
+
+define float @sitofp_i128_to_f32(i128 %x) {
+; SDAG-LABEL: sitofp_i128_to_f32:
+; SDAG:       ; %bb.0: ; %itofp-entry
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_or_b32_e32 v5, v1, v3
+; SDAG-NEXT:    v_or_b32_e32 v4, v0, v2
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v4, 0
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB0_14
+; SDAG-NEXT:  ; %bb.1: ; %itofp-if-end
+; SDAG-NEXT:    v_ashrrev_i32_e32 v5, 31, v3
+; SDAG-NEXT:    v_xor_b32_e32 v0, v5, v0
+; SDAG-NEXT:    v_xor_b32_e32 v1, v5, v1
+; SDAG-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v5
+; SDAG-NEXT:    v_xor_b32_e32 v2, v5, v2
+; SDAG-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v5, vcc
+; SDAG-NEXT:    v_xor_b32_e32 v6, v5, v3
+; SDAG-NEXT:    v_subb_co_u32_e32 v4, vcc, v2, v5, vcc
+; SDAG-NEXT:    v_subb_co_u32_e32 v5, vcc, v6, v5, vcc
+; SDAG-NEXT:    v_ffbh_u32_e32 v2, v4
+; SDAG-NEXT:    v_add_u32_e32 v2, 32, v2
+; SDAG-NEXT:    v_ffbh_u32_e32 v6, v5
+; SDAG-NEXT:    v_min_u32_e32 v2, v2, v6
+; SDAG-NEXT:    v_ffbh_u32_e32 v6, v0
+; SDAG-NEXT:    v_add_u32_e32 v6, 32, v6
+; SDAG-NEXT:    v_ffbh_u32_e32 v7, v1
+; SDAG-NEXT:    v_min_u32_e32 v6, v6, v7
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; SDAG-NEXT:    v_add_u32_e32 v6, 64, v6
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, v6, v2, vcc
+; SDAG-NEXT:    v_sub_u32_e32 v6, 0x80, v7
+; SDAG-NEXT:    v_sub_u32_e32 v2, 0x7f, v7
+; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 25, v6
+; SDAG-NEXT:    ; implicit-def: $vgpr8
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
+; SDAG-NEXT:    v_add_u32_e32 v4, 0xffffff98, v7
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v0, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr6
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    ; implicit-def: $vgpr7
+; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT:  ; %bb.3: ; %Flow3
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB0_13
+; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
+; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v6
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB0_8
+; SDAG-NEXT:  ; %bb.5: ; %LeafBlock
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v6
+; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB0_7
+; SDAG-NEXT:  ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT:    v_sub_u32_e32 v12, 0x66, v7
+; SDAG-NEXT:    v_sub_u32_e32 v10, 64, v12
+; SDAG-NEXT:    v_lshrrev_b64 v[8:9], v12, v[0:1]
+; SDAG-NEXT:    v_lshlrev_b64 v[10:11], v10, v[4:5]
+; SDAG-NEXT:    v_sub_u32_e32 v13, 38, v7
+; SDAG-NEXT:    v_or_b32_e32 v11, v9, v11
+; SDAG-NEXT:    v_or_b32_e32 v10, v8, v10
+; SDAG-NEXT:    v_lshrrev_b64 v[8:9], v13, v[4:5]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v12
+; SDAG-NEXT:    v_add_u32_e32 v14, 26, v7
+; SDAG-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v12
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
+; SDAG-NEXT:    v_lshrrev_b64 v[10:11], v13, v[0:1]
+; SDAG-NEXT:    v_lshlrev_b64 v[12:13], v14, v[4:5]
+; SDAG-NEXT:    v_subrev_u32_e32 v7, 38, v7
+; SDAG-NEXT:    v_cndmask_b32_e64 v15, v8, v0, s[4:5]
+; SDAG-NEXT:    v_lshlrev_b64 v[7:8], v7, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e64 v9, v9, v1, s[4:5]
+; SDAG-NEXT:    v_or_b32_e32 v11, v13, v11
+; SDAG-NEXT:    v_or_b32_e32 v10, v12, v10
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v14, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v11, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v10, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v5, v8, v5, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v4, v7, v4, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT:    v_or_b32_e32 v1, v1, v5
+; SDAG-NEXT:    v_or_b32_e32 v0, v0, v4
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    v_or_b32_e32 v8, v15, v0
+; SDAG-NEXT:    v_mov_b32_e32 v0, v8
+; SDAG-NEXT:    v_mov_b32_e32 v1, v9
+; SDAG-NEXT:  .LBB0_7: ; %Flow1
+; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
+; SDAG-NEXT:  .LBB0_8: ; %Flow2
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
+; SDAG-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:    v_lshrrev_b32_e32 v4, 2, v0
+; SDAG-NEXT:    v_and_or_b32 v0, v4, 1, v0
+; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; SDAG-NEXT:    v_and_b32_e32 v4, 0x4000000, v0
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; SDAG-NEXT:    v_alignbit_b32 v8, v1, v0, 2
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:  ; %bb.11: ; %itofp-if-then20
+; SDAG-NEXT:    v_alignbit_b32 v8, v1, v0, 3
+; SDAG-NEXT:    v_mov_b32_e32 v2, v6
+; SDAG-NEXT:  ; %bb.12: ; %Flow
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:  .LBB0_13: ; %Flow4
+; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
+; SDAG-NEXT:    v_and_b32_e32 v0, 0x80000000, v3
+; SDAG-NEXT:    v_lshl_add_u32 v1, v2, 23, 1.0
+; SDAG-NEXT:    v_and_b32_e32 v2, 0x7fffff, v8
+; SDAG-NEXT:    v_or3_b32 v4, v2, v0, v1
+; SDAG-NEXT:  .LBB0_14: ; %Flow5
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:    v_mov_b32_e32 v0, v4
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: sitofp_i128_to_f32:
+; GISEL:       ; %bb.0: ; %itofp-entry
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_or_b32_e32 v4, v0, v2
+; GISEL-NEXT:    v_or_b32_e32 v5, v1, v3
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GISEL-NEXT:    s_mov_b32 s4, 0
+; GISEL-NEXT:    v_mov_b32_e32 v4, s4
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB0_14
+; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
+; GISEL-NEXT:    v_xor_b32_e32 v0, v6, v0
+; GISEL-NEXT:    v_xor_b32_e32 v1, v6, v1
+; GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_xor_b32_e32 v2, v6, v2
+; GISEL-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v3, v6, v3
+; GISEL-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v6, vcc
+; GISEL-NEXT:    v_ffbh_u32_e32 v5, v0
+; GISEL-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v6, vcc
+; GISEL-NEXT:    v_ffbh_u32_e32 v4, v1
+; GISEL-NEXT:    v_add_u32_e32 v5, 32, v5
+; GISEL-NEXT:    v_ffbh_u32_e32 v7, v2
+; GISEL-NEXT:    v_min_u32_e32 v4, v4, v5
+; GISEL-NEXT:    v_ffbh_u32_e32 v5, v3
+; GISEL-NEXT:    v_add_u32_e32 v7, 32, v7
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    v_add_u32_e32 v4, 64, v4
+; GISEL-NEXT:    v_min_u32_e32 v5, v5, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GISEL-NEXT:    v_sub_u32_e32 v8, 0x80, v5
+; GISEL-NEXT:    v_sub_u32_e32 v7, 0x7f, v5
+; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 24, v8
+; GISEL-NEXT:    ; implicit-def: $vgpr4
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT:    ; implicit-def: $vgpr8
+; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr5
+; GISEL-NEXT:    ; implicit-def: $vgpr2
+; GISEL-NEXT:  ; %bb.3: ; %Flow3
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB0_13
+; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
+; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v8
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB0_8
+; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v8
+; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB0_7
+; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT:    v_sub_u32_e32 v4, 0x66, v5
+; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v4
+; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v4, v[0:1]
+; GISEL-NEXT:    v_lshlrev_b64 v[11:12], v11, v[2:3]
+; GISEL-NEXT:    v_subrev_u32_e32 v13, 64, v4
+; GISEL-NEXT:    v_or_b32_e32 v11, v9, v11
+; GISEL-NEXT:    v_or_b32_e32 v12, v10, v12
+; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v13, v[2:3]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GISEL-NEXT:    v_add_u32_e32 v5, 26, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v12, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v13, v9, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v10, v1, vcc
+; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v5, -1
+; GISEL-NEXT:    v_lshlrev_b64 v[11:12], v11, -1
+; GISEL-NEXT:    v_subrev_u32_e32 v14, 64, v5
+; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
+; GISEL-NEXT:    v_or_b32_e32 v16, v10, v12
+; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v14, -1
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v11, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, v12, -1, s[4:5]
+; GISEL-NEXT:    v_and_b32_e32 v2, v9, v2
+; GISEL-NEXT:    v_and_b32_e32 v3, v10, v3
+; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
+; GISEL-NEXT:    v_and_or_b32 v1, v11, v1, v3
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_or_b32_e32 v3, v13, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, v3
+; GISEL-NEXT:    v_mov_b32_e32 v1, v4
+; GISEL-NEXT:    v_mov_b32_e32 v2, v5
+; GISEL-NEXT:    v_mov_b32_e32 v3, v6
+; GISEL-NEXT:  .LBB0_7: ; %Flow1
+; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:  .LBB0_8: ; %Flow2
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
+; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:    v_bfe_u32 v2, v0, 2, 1
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_and_b32_e32 v2, 0x4000000, v0
+; GISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v7, v8
+; GISEL-NEXT:  ; %bb.12: ; %Flow
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:  .LBB0_13: ; %Flow4
+; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT:    v_and_b32_e32 v0, 0x80000000, v6
+; GISEL-NEXT:    v_lshl_add_u32 v1, v7, 23, 1.0
+; GISEL-NEXT:    v_and_b32_e32 v2, 0x7fffff, v4
+; GISEL-NEXT:    v_or3_b32 v4, v2, v0, v1
+; GISEL-NEXT:  .LBB0_14: ; %Flow5
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    v_mov_b32_e32 v0, v4
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %cvt = sitofp i128 %x to float
+  ret float %cvt
+}
+
+define float @uitofp_i128_to_f32(i128 %x) {
+; SDAG-LABEL: uitofp_i128_to_f32:
+; SDAG:       ; %bb.0: ; %itofp-entry
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_or_b32_e32 v5, v1, v3
+; SDAG-NEXT:    v_or_b32_e32 v4, v0, v2
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v4, 0
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB1_14
+; SDAG-NEXT:  ; %bb.1: ; %itofp-if-end
+; SDAG-NEXT:    v_ffbh_u32_e32 v4, v2
+; SDAG-NEXT:    v_add_u32_e32 v4, 32, v4
+; SDAG-NEXT:    v_ffbh_u32_e32 v5, v3
+; SDAG-NEXT:    v_min_u32_e32 v4, v4, v5
+; SDAG-NEXT:    v_ffbh_u32_e32 v5, v0
+; SDAG-NEXT:    v_add_u32_e32 v5, 32, v5
+; SDAG-NEXT:    v_ffbh_u32_e32 v6, v1
+; SDAG-NEXT:    v_min_u32_e32 v5, v5, v6
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    v_add_u32_e32 v5, 64, v5
+; SDAG-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
+; SDAG-NEXT:    v_sub_u32_e32 v5, 0x80, v6
+; SDAG-NEXT:    v_sub_u32_e32 v4, 0x7f, v6
+; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 25, v5
+; SDAG-NEXT:    ; implicit-def: $vgpr7
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
+; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v6
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v0, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr5
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    ; implicit-def: $vgpr6
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:  ; %bb.3: ; %Flow3
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB1_13
+; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
+; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v5
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB1_8
+; SDAG-NEXT:  ; %bb.5: ; %LeafBlock
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v5
+; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB1_7
+; SDAG-NEXT:  ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT:    v_sub_u32_e32 v11, 0x66, v6
+; SDAG-NEXT:    v_sub_u32_e32 v9, 64, v11
+; SDAG-NEXT:    v_lshrrev_b64 v[7:8], v11, v[0:1]
+; SDAG-NEXT:    v_lshlrev_b64 v[9:10], v9, v[2:3]
+; SDAG-NEXT:    v_sub_u32_e32 v12, 38, v6
+; SDAG-NEXT:    v_or_b32_e32 v10, v8, v10
+; SDAG-NEXT:    v_or_b32_e32 v9, v7, v9
+; SDAG-NEXT:    v_lshrrev_b64 v[7:8], v12, v[2:3]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
+; SDAG-NEXT:    v_add_u32_e32 v13, 26, v6
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v9, vcc
+; SDAG-NEXT:    v_lshrrev_b64 v[9:10], v12, v[0:1]
+; SDAG-NEXT:    v_lshlrev_b64 v[11:12], v13, v[2:3]
+; SDAG-NEXT:    v_subrev_u32_e32 v6, 38, v6
+; SDAG-NEXT:    v_cndmask_b32_e64 v14, v7, v0, s[4:5]
+; SDAG-NEXT:    v_lshlrev_b64 v[6:7], v6, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s[4:5]
+; SDAG-NEXT:    v_or_b32_e32 v10, v12, v10
+; SDAG-NEXT:    v_or_b32_e32 v9, v11, v9
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v13, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v10, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
+; SDAG-NEXT:    v_cndmask_b32_e32 v6, v6, v9, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT:    v_or_b32_e32 v1, v1, v3
+; SDAG-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    v_or_b32_e32 v7, v14, v0
+; SDAG-NEXT:    v_mov_b32_e32 v0, v7
+; SDAG-NEXT:    v_mov_b32_e32 v1, v8
+; SDAG-NEXT:  .LBB1_7: ; %Flow1
+; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
+; SDAG-NEXT:  .LBB1_8: ; %Flow2
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
+; SDAG-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:    v_lshrrev_b32_e32 v2, 2, v0
+; SDAG-NEXT:    v_and_or_b32 v0, v2, 1, v0
+; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; SDAG-NEXT:    v_and_b32_e32 v2, 0x4000000, v0
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; SDAG-NEXT:    v_alignbit_b32 v7, v1, v0, 2
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:  ; %bb.11: ; %itofp-if-then20
+; SDAG-NEXT:    v_alignbit_b32 v7, v1, v0, 3
+; SDAG-NEXT:    v_mov_b32_e32 v4, v5
+; SDAG-NEXT:  ; %bb.12: ; %Flow
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:  .LBB1_13: ; %Flow4
+; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
+; SDAG-NEXT:    v_and_b32_e32 v0, 0x7fffff, v7
+; SDAG-NEXT:    v_lshl_or_b32 v0, v4, 23, v0
+; SDAG-NEXT:    v_add_u32_e32 v4, 1.0, v0
+; SDAG-NEXT:  .LBB1_14: ; %Flow5
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:    v_mov_b32_e32 v0, v4
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: uitofp_i128_to_f32:
+; GISEL:       ; %bb.0: ; %itofp-entry
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_or_b32_e32 v4, v0, v2
+; GISEL-NEXT:    v_or_b32_e32 v5, v1, v3
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GISEL-NEXT:    s_mov_b32 s4, 0
+; GISEL-NEXT:    v_mov_b32_e32 v4, s4
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB1_14
+; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
+; GISEL-NEXT:    v_ffbh_u32_e32 v5, v0
+; GISEL-NEXT:    v_ffbh_u32_e32 v4, v1
+; GISEL-NEXT:    v_add_u32_e32 v5, 32, v5
+; GISEL-NEXT:    v_ffbh_u32_e32 v6, v2
+; GISEL-NEXT:    v_min_u32_e32 v4, v4, v5
+; GISEL-NEXT:    v_ffbh_u32_e32 v5, v3
+; GISEL-NEXT:    v_add_u32_e32 v6, 32, v6
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    v_add_u32_e32 v4, 64, v4
+; GISEL-NEXT:    v_min_u32_e32 v5, v5, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GISEL-NEXT:    v_sub_u32_e32 v7, 0x80, v5
+; GISEL-NEXT:    v_sub_u32_e32 v6, 0x7f, v5
+; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 24, v7
+; GISEL-NEXT:    ; implicit-def: $vgpr4
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT:    ; implicit-def: $vgpr7
+; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr5
+; GISEL-NEXT:    ; implicit-def: $vgpr2
+; GISEL-NEXT:  ; %bb.3: ; %Flow3
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB1_13
+; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
+; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v7
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB1_8
+; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v7
+; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB1_7
+; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT:    v_sub_u32_e32 v4, 0x66, v5
+; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v4
+; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v4, v[0:1]
+; GISEL-NEXT:    v_lshlrev_b64 v[10:11], v10, v[2:3]
+; GISEL-NEXT:    v_subrev_u32_e32 v12, 64, v4
+; GISEL-NEXT:    v_or_b32_e32 v10, v8, v10
+; GISEL-NEXT:    v_or_b32_e32 v11, v9, v11
+; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v12, v[2:3]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GISEL-NEXT:    v_add_u32_e32 v5, 26, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v8, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v9, v1, vcc
+; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v5, -1
+; GISEL-NEXT:    v_lshlrev_b64 v[10:11], v10, -1
+; GISEL-NEXT:    v_subrev_u32_e32 v13, 64, v5
+; GISEL-NEXT:    v_or_b32_e32 v14, v8, v10
+; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
+; GISEL-NEXT:    v_lshrrev_b64 v[10:11], v13, -1
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v14, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v10, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v11, -1, s[4:5]
+; GISEL-NEXT:    v_and_b32_e32 v2, v8, v2
+; GISEL-NEXT:    v_and_b32_e32 v3, v9, v3
+; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
+; GISEL-NEXT:    v_and_or_b32 v1, v10, v1, v3
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_or_b32_e32 v3, v12, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, v3
+; GISEL-NEXT:    v_mov_b32_e32 v1, v4
+; GISEL-NEXT:    v_mov_b32_e32 v2, v5
+; GISEL-NEXT:    v_mov_b32_e32 v3, v6
+; GISEL-NEXT:  .LBB1_7: ; %Flow1
+; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:  .LBB1_8: ; %Flow2
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
+; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:    v_bfe_u32 v2, v0, 2, 1
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_and_b32_e32 v2, 0x4000000, v0
+; GISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v6, v7
+; GISEL-NEXT:  ; %bb.12: ; %Flow
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:  .LBB1_13: ; %Flow4
+; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT:    v_lshl_add_u32 v0, v6, 23, 1.0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fffff
+; GISEL-NEXT:    v_and_or_b32 v4, v4, v1, v0
+; GISEL-NEXT:  .LBB1_14: ; %Flow5
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    v_mov_b32_e32 v0, v4
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %cvt = uitofp i128 %x to float
+  ret float %cvt
+}
+
+define double @sitofp_i128_to_f64(i128 %x) {
+; SDAG-LABEL: sitofp_i128_to_f64:
+; SDAG:       ; %bb.0: ; %itofp-entry
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mov_b32_e32 v5, v1
+; SDAG-NEXT:    v_mov_b32_e32 v4, v0
+; SDAG-NEXT:    v_or_b32_e32 v1, v5, v3
+; SDAG-NEXT:    v_or_b32_e32 v0, v4, v2
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB2_14
+; SDAG-NEXT:  ; %bb.1: ; %itofp-if-end
+; SDAG-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
+; SDAG-NEXT:    v_xor_b32_e32 v4, v0, v4
+; SDAG-NEXT:    v_xor_b32_e32 v5, v0, v5
+; SDAG-NEXT:    v_sub_co_u32_e32 v4, vcc, v4, v0
+; SDAG-NEXT:    v_xor_b32_e32 v2, v0, v2
+; SDAG-NEXT:    v_subb_co_u32_e32 v5, vcc, v5, v0, vcc
+; SDAG-NEXT:    v_xor_b32_e32 v1, v0, v3
+; SDAG-NEXT:    v_subb_co_u32_e32 v6, vcc, v2, v0, vcc
+; SDAG-NEXT:    v_subb_co_u32_e32 v7, vcc, v1, v0, vcc
+; SDAG-NEXT:    v_ffbh_u32_e32 v0, v6
+; SDAG-NEXT:    v_add_u32_e32 v0, 32, v0
+; SDAG-NEXT:    v_ffbh_u32_e32 v1, v7
+; SDAG-NEXT:    v_min_u32_e32 v0, v0, v1
+; SDAG-NEXT:    v_ffbh_u32_e32 v1, v4
+; SDAG-NEXT:    v_add_u32_e32 v1, 32, v1
+; SDAG-NEXT:    v_ffbh_u32_e32 v2, v5
+; SDAG-NEXT:    v_min_u32_e32 v1, v1, v2
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
+; SDAG-NEXT:    v_add_u32_e32 v1, 64, v1
+; SDAG-NEXT:    v_cndmask_b32_e32 v9, v1, v0, vcc
+; SDAG-NEXT:    v_sub_u32_e32 v8, 0x80, v9
+; SDAG-NEXT:    v_sub_u32_e32 v2, 0x7f, v9
+; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 54, v8
+; SDAG-NEXT:    ; implicit-def: $vgpr10
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
+; SDAG-NEXT:    v_add_u32_e32 v6, 0xffffffb5, v9
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v6, v[4:5]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
+; SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr8
+; SDAG-NEXT:    ; implicit-def: $vgpr6_vgpr7
+; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT:    ; implicit-def: $vgpr9
+; SDAG-NEXT:  ; %bb.3: ; %Flow3
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB2_13
+; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
+; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 54, v8
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB2_8
+; SDAG-NEXT:  ; %bb.5: ; %LeafBlock
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 55, v8
+; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB2_7
+; SDAG-NEXT:  ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT:    v_sub_u32_e32 v12, 0x49, v9
+; SDAG-NEXT:    v_sub_u32_e32 v10, 64, v12
+; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v12, v[4:5]
+; SDAG-NEXT:    v_lshlrev_b64 v[10:11], v10, v[6:7]
+; SDAG-NEXT:    v_sub_u32_e32 v13, 9, v9
+; SDAG-NEXT:    v_or_b32_e32 v11, v1, v11
+; SDAG-NEXT:    v_or_b32_e32 v10, v0, v10
+; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v13, v[6:7]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v12
+; SDAG-NEXT:    v_add_u32_e32 v16, 55, v9
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v12
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; SDAG-NEXT:    v_lshrrev_b64 v[10:11], v12, v[6:7]
+; SDAG-NEXT:    v_lshrrev_b64 v[12:13], v13, v[4:5]
+; SDAG-NEXT:    v_lshlrev_b64 v[14:15], v16, v[6:7]
+; SDAG-NEXT:    v_add_u32_e32 v9, -9, v9
+; SDAG-NEXT:    v_or_b32_e32 v15, v15, v13
+; SDAG-NEXT:    v_or_b32_e32 v14, v14, v12
+; SDAG-NEXT:    v_lshlrev_b64 v[12:13], v9, v[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v11, 0, v11, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
+; SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v9, v13, v15, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v16
+; SDAG-NEXT:    v_lshlrev_b64 v[4:5], v16, v[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v7, v9, v7, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v9, v12, v14, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v6, v9, v6, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; SDAG-NEXT:    v_or_b32_e32 v5, v5, v7
+; SDAG-NEXT:    v_or_b32_e32 v4, v4, v6
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v6, v10
+; SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; SDAG-NEXT:    v_or_b32_e32 v0, v0, v4
+; SDAG-NEXT:    v_mov_b32_e32 v5, v1
+; SDAG-NEXT:    v_mov_b32_e32 v4, v0
+; SDAG-NEXT:    v_mov_b32_e32 v7, v11
+; SDAG-NEXT:  .LBB2_7: ; %Flow1
+; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
+; SDAG-NEXT:  .LBB2_8: ; %Flow2
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
+; SDAG-NEXT:    v_lshrrev_b32_e32 v0, 31, v5
+; SDAG-NEXT:    v_lshlrev_b64 v[4:5], 1, v[4:5]
+; SDAG-NEXT:    v_or_b32_e32 v6, v6, v0
+; SDAG-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:    v_lshrrev_b32_e32 v0, 2, v4
+; SDAG-NEXT:    v_and_or_b32 v0, v0, 1, v4
+; SDAG-NEXT:    v_add_co_u32_e32 v4, vcc, 1, v0
+; SDAG-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
+; SDAG-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+; SDAG-NEXT:    v_lshrrev_b64 v[0:1], 2, v[4:5]
+; SDAG-NEXT:    v_lshlrev_b32_e32 v7, 30, v6
+; SDAG-NEXT:    v_or_b32_e32 v10, v1, v7
+; SDAG-NEXT:    v_and_b32_e32 v1, 0x800000, v5
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:  ; %bb.11: ; %itofp-if-then20
+; SDAG-NEXT:    v_lshrrev_b64 v[0:1], 3, v[4:5]
+; SDAG-NEXT:    v_lshlrev_b32_e32 v2, 29, v6
+; SDAG-NEXT:    v_or_b32_e32 v10, v1, v2
+; SDAG-NEXT:    v_mov_b32_e32 v2, v8
+; SDAG-NEXT:  ; %bb.12: ; %Flow
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:  .LBB2_13: ; %Flow4
+; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
+; SDAG-NEXT:    v_and_b32_e32 v1, 0x80000000, v3
+; SDAG-NEXT:    v_mov_b32_e32 v3, 0x3ff00000
+; SDAG-NEXT:    v_lshl_add_u32 v2, v2, 20, v3
+; SDAG-NEXT:    v_and_b32_e32 v3, 0xfffff, v10
+; SDAG-NEXT:    v_or3_b32 v1, v3, v1, v2
+; SDAG-NEXT:  .LBB2_14: ; %Flow5
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: sitofp_i128_to_f64:
+; GISEL:       ; %bb.0: ; %itofp-entry
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v4, v0
+; GISEL-NEXT:    v_mov_b32_e32 v5, v1
+; GISEL-NEXT:    s_mov_b64 s[4:5], 0
+; GISEL-NEXT:    v_or_b32_e32 v0, v4, v2
+; GISEL-NEXT:    v_or_b32_e32 v1, v5, v3
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB2_14
+; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
+; GISEL-NEXT:    v_xor_b32_e32 v0, v6, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v6, v5
+; GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_xor_b32_e32 v2, v6, v2
+; GISEL-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v3, v6, v3
+; GISEL-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v6, vcc
+; GISEL-NEXT:    v_ffbh_u32_e32 v5, v0
+; GISEL-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v6, vcc
+; GISEL-NEXT:    v_ffbh_u32_e32 v4, v1
+; GISEL-NEXT:    v_add_u32_e32 v5, 32, v5
+; GISEL-NEXT:    v_ffbh_u32_e32 v7, v2
+; GISEL-NEXT:    v_min_u32_e32 v4, v4, v5
+; GISEL-NEXT:    v_ffbh_u32_e32 v5, v3
+; GISEL-NEXT:    v_add_u32_e32 v7, 32, v7
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    v_add_u32_e32 v4, 64, v4
+; GISEL-NEXT:    v_min_u32_e32 v5, v5, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v5, v4, vcc
+; GISEL-NEXT:    v_sub_u32_e32 v8, 0x80, v9
+; GISEL-NEXT:    v_sub_u32_e32 v7, 0x7f, v9
+; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 53, v8
+; GISEL-NEXT:    ; implicit-def: $vgpr10
+; GISEL-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffffb5, v9
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v1, vcc
+; GISEL-NEXT:    ; implicit-def: $vgpr8
+; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr9
+; GISEL-NEXT:  ; %bb.3: ; %Flow3
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB2_13
+; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
+; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 55, v8
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB2_8
+; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 55, v8
+; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB2_7
+; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT:    v_sub_u32_e32 v14, 0x49, v9
+; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v14
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v14, v[0:1]
+; GISEL-NEXT:    v_lshlrev_b64 v[10:11], v10, v[2:3]
+; GISEL-NEXT:    v_subrev_u32_e32 v15, 64, v14
+; GISEL-NEXT:    v_or_b32_e32 v10, v4, v10
+; GISEL-NEXT:    v_or_b32_e32 v11, v5, v11
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v15, v[2:3]
+; GISEL-NEXT:    v_lshrrev_b64 v[12:13], v14, v[2:3]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
+; GISEL-NEXT:    v_add_u32_e32 v14, 55, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc
+; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, v4, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v5, v1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, 0, v12, vcc
+; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v14, -1
+; GISEL-NEXT:    v_lshlrev_b64 v[11:12], v11, -1
+; GISEL-NEXT:    v_subrev_u32_e32 v15, 64, v14
+; GISEL-NEXT:    v_or_b32_e32 v16, v9, v11
+; GISEL-NEXT:    v_or_b32_e32 v17, v10, v12
+; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v15, -1
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v16, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v17, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, v12, -1, s[4:5]
+; GISEL-NEXT:    v_and_b32_e32 v2, v9, v2
+; GISEL-NEXT:    v_and_b32_e32 v3, v10, v3
+; GISEL-NEXT:    v_and_or_b32 v0, v11, v0, v2
+; GISEL-NEXT:    v_and_or_b32 v1, v12, v1, v3
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_or_b32_e32 v3, v13, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, v3
+; GISEL-NEXT:    v_mov_b32_e32 v1, v4
+; GISEL-NEXT:    v_mov_b32_e32 v2, v5
+; GISEL-NEXT:    v_mov_b32_e32 v3, v6
+; GISEL-NEXT:  .LBB2_7: ; %Flow1
+; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:  .LBB2_8: ; %Flow2
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT:    v_lshlrev_b64 v[9:10], 1, v[0:1]
+; GISEL-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
+; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
+; GISEL-NEXT:    v_or_b32_e32 v11, v2, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, v9
+; GISEL-NEXT:    v_mov_b32_e32 v1, v10
+; GISEL-NEXT:    v_mov_b32_e32 v2, v11
+; GISEL-NEXT:    v_mov_b32_e32 v3, v12
+; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:    v_bfe_u32 v3, v0, 2, 1
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v3
+; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v9, 0
+; GISEL-NEXT:    v_and_b32_e32 v10, 0x800000, v1
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[9:10]
+; GISEL-NEXT:    v_lshl_or_b32 v10, v2, 30, v5
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v7, v8
+; GISEL-NEXT:    v_lshl_or_b32 v10, v2, 29, v5
+; GISEL-NEXT:  ; %bb.12: ; %Flow
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:  .LBB2_13: ; %Flow4
+; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT:    v_and_b32_e32 v0, 0x80000000, v6
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x3ff00000
+; GISEL-NEXT:    v_mov_b32_e32 v2, 0xfffff
+; GISEL-NEXT:    v_lshl_add_u32 v1, v7, 20, v1
+; GISEL-NEXT:    v_and_or_b32 v2, v10, v2, v0
+; GISEL-NEXT:    v_and_or_b32 v0, v4, -1, 0
+; GISEL-NEXT:    v_or3_b32 v1, v2, v1, 0
+; GISEL-NEXT:  .LBB2_14: ; %Flow5
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %cvt = sitofp i128 %x to double
+  ret double %cvt
+}
+
+define double @uitofp_i128_to_f64(i128 %x) {
+; SDAG-LABEL: uitofp_i128_to_f64:
+; SDAG:       ; %bb.0: ; %itofp-entry
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_or_b32_e32 v5, v1, v3
+; SDAG-NEXT:    v_or_b32_e32 v4, v0, v2
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v4, 0
+; SDAG-NEXT:    v_mov_b32_e32 v5, 0
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB3_14
+; SDAG-NEXT:  ; %bb.1: ; %itofp-if-end
+; SDAG-NEXT:    v_ffbh_u32_e32 v4, v2
+; SDAG-NEXT:    v_add_u32_e32 v4, 32, v4
+; SDAG-NEXT:    v_ffbh_u32_e32 v5, v3
+; SDAG-NEXT:    v_min_u32_e32 v4, v4, v5
+; SDAG-NEXT:    v_ffbh_u32_e32 v5, v0
+; SDAG-NEXT:    v_add_u32_e32 v5, 32, v5
+; SDAG-NEXT:    v_ffbh_u32_e32 v6, v1
+; SDAG-NEXT:    v_min_u32_e32 v5, v5, v6
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    v_add_u32_e32 v5, 64, v5
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, v5, v4, vcc
+; SDAG-NEXT:    v_sub_u32_e32 v7, 0x80, v8
+; SDAG-NEXT:    v_sub_u32_e32 v6, 0x7f, v8
+; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 54, v7
+; SDAG-NEXT:    ; implicit-def: $vgpr9
+; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
+; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffffb5, v8
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v9, 0, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr7
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    ; implicit-def: $vgpr8
+; SDAG-NEXT:  ; %bb.3: ; %Flow3
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB3_13
+; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
+; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 54, v7
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB3_8
+; SDAG-NEXT:  ; %bb.5: ; %LeafBlock
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 55, v7
+; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB3_7
+; SDAG-NEXT:  ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT:    v_sub_u32_e32 v11, 0x49, v8
+; SDAG-NEXT:    v_sub_u32_e32 v9, 64, v11
+; SDAG-NEXT:    v_lshrrev_b64 v[4:5], v11, v[0:1]
+; SDAG-NEXT:    v_lshlrev_b64 v[9:10], v9, v[2:3]
+; SDAG-NEXT:    v_sub_u32_e32 v12, 9, v8
+; SDAG-NEXT:    v_or_b32_e32 v10, v5, v10
+; SDAG-NEXT:    v_or_b32_e32 v9, v4, v9
+; SDAG-NEXT:    v_lshrrev_b64 v[4:5], v12, v[2:3]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
+; SDAG-NEXT:    v_add_u32_e32 v15, 55, v8
+; SDAG-NEXT:    v_cndmask_b32_e32 v5, v5, v10, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
+; SDAG-NEXT:    v_lshrrev_b64 v[9:10], v11, v[2:3]
+; SDAG-NEXT:    v_lshrrev_b64 v[11:12], v12, v[0:1]
+; SDAG-NEXT:    v_lshlrev_b64 v[13:14], v15, v[2:3]
+; SDAG-NEXT:    v_add_u32_e32 v8, -9, v8
+; SDAG-NEXT:    v_or_b32_e32 v14, v14, v12
+; SDAG-NEXT:    v_or_b32_e32 v13, v13, v11
+; SDAG-NEXT:    v_lshlrev_b64 v[11:12], v8, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
+; SDAG-NEXT:    v_cndmask_b32_e64 v5, v5, v1, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v4, v4, v0, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, v12, v14, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v15, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, v11, v13, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, v8, v2, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT:    v_or_b32_e32 v1, v1, v3
+; SDAG-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; SDAG-NEXT:    v_mov_b32_e32 v2, v9
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    v_or_b32_e32 v4, v4, v0
+; SDAG-NEXT:    v_mov_b32_e32 v0, v4
+; SDAG-NEXT:    v_mov_b32_e32 v1, v5
+; SDAG-NEXT:    v_mov_b32_e32 v3, v10
+; SDAG-NEXT:  .LBB3_7: ; %Flow1
+; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
+; SDAG-NEXT:  .LBB3_8: ; %Flow2
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
+; SDAG-NEXT:    v_lshrrev_b32_e32 v3, 31, v1
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
+; SDAG-NEXT:    v_or_b32_e32 v2, v2, v3
+; SDAG-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:    v_lshrrev_b32_e32 v3, 2, v0
+; SDAG-NEXT:    v_and_or_b32 v0, v3, 1, v0
+; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; SDAG-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
+; SDAG-NEXT:    v_and_b32_e32 v3, 0x800000, v1
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; SDAG-NEXT:    v_alignbit_b32 v9, v2, v1, 2
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:  ; %bb.11: ; %itofp-if-then20
+; SDAG-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; SDAG-NEXT:    v_alignbit_b32 v9, v2, v1, 3
+; SDAG-NEXT:    v_mov_b32_e32 v6, v7
+; SDAG-NEXT:  ; %bb.12: ; %Flow
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:  .LBB3_13: ; %Flow4
+; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
+; SDAG-NEXT:    v_and_b32_e32 v0, 0xfffff, v9
+; SDAG-NEXT:    v_lshl_or_b32 v0, v6, 20, v0
+; SDAG-NEXT:    v_add_u32_e32 v5, 0x3ff00000, v0
+; SDAG-NEXT:  .LBB3_14: ; %Flow5
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:    v_mov_b32_e32 v0, v4
+; SDAG-NEXT:    v_mov_b32_e32 v1, v5
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: uitofp_i128_to_f64:
+; GISEL:       ; %bb.0: ; %itofp-entry
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b64 s[4:5], 0
+; GISEL-NEXT:    v_or_b32_e32 v4, v0, v2
+; GISEL-NEXT:    v_or_b32_e32 v5, v1, v3
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GISEL-NEXT:    v_mov_b32_e32 v4, s4
+; GISEL-NEXT:    v_mov_b32_e32 v5, s5
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB3_14
+; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
+; GISEL-NEXT:    v_ffbh_u32_e32 v5, v0
+; GISEL-NEXT:    v_ffbh_u32_e32 v4, v1
+; GISEL-NEXT:    v_add_u32_e32 v5, 32, v5
+; GISEL-NEXT:    v_ffbh_u32_e32 v6, v2
+; GISEL-NEXT:    v_min_u32_e32 v4, v4, v5
+; GISEL-NEXT:    v_ffbh_u32_e32 v5, v3
+; GISEL-NEXT:    v_add_u32_e32 v6, 32, v6
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    v_add_u32_e32 v4, 64, v4
+; GISEL-NEXT:    v_min_u32_e32 v5, v5, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v5, v4, vcc
+; GISEL-NEXT:    v_sub_u32_e32 v7, 0x80, v8
+; GISEL-NEXT:    v_sub_u32_e32 v6, 0x7f, v8
+; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 53, v7
+; GISEL-NEXT:    ; implicit-def: $vgpr9
+; GISEL-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffffb5, v8
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v1, vcc
+; GISEL-NEXT:    ; implicit-def: $vgpr7
+; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr8
+; GISEL-NEXT:  ; %bb.3: ; %Flow3
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB3_13
+; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
+; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 55, v7
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB3_8
+; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 55, v7
+; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB3_7
+; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT:    v_sub_u32_e32 v13, 0x49, v8
+; GISEL-NEXT:    v_sub_u32_e32 v9, 64, v13
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v13, v[0:1]
+; GISEL-NEXT:    v_lshlrev_b64 v[9:10], v9, v[2:3]
+; GISEL-NEXT:    v_subrev_u32_e32 v14, 64, v13
+; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v13, v[2:3]
+; GISEL-NEXT:    v_or_b32_e32 v9, v4, v9
+; GISEL-NEXT:    v_or_b32_e32 v10, v5, v10
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v14, v[2:3]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
+; GISEL-NEXT:    v_add_u32_e32 v8, 55, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v10, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v11, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v12, vcc
+; GISEL-NEXT:    v_sub_u32_e32 v12, 64, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, v4, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v5, v1, s[4:5]
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v8, -1
+; GISEL-NEXT:    v_lshlrev_b64 v[12:13], v12, -1
+; GISEL-NEXT:    v_subrev_u32_e32 v15, 64, v8
+; GISEL-NEXT:    v_or_b32_e32 v16, v4, v12
+; GISEL-NEXT:    v_or_b32_e32 v17, v5, v13
+; GISEL-NEXT:    v_lshrrev_b64 v[12:13], v15, -1
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v13, v13, v17, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v12, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, v13, -1, s[4:5]
+; GISEL-NEXT:    v_and_b32_e32 v2, v4, v2
+; GISEL-NEXT:    v_and_b32_e32 v3, v5, v3
+; GISEL-NEXT:    v_and_or_b32 v0, v8, v0, v2
+; GISEL-NEXT:    v_and_or_b32 v1, v12, v1, v3
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_or_b32_e32 v8, v14, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, v8
+; GISEL-NEXT:    v_mov_b32_e32 v1, v9
+; GISEL-NEXT:    v_mov_b32_e32 v2, v10
+; GISEL-NEXT:    v_mov_b32_e32 v3, v11
+; GISEL-NEXT:  .LBB3_7: ; %Flow1
+; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:  .LBB3_8: ; %Flow2
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT:    v_lshlrev_b64 v[8:9], 1, v[0:1]
+; GISEL-NEXT:    v_lshlrev_b64 v[10:11], 1, v[2:3]
+; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
+; GISEL-NEXT:    v_or_b32_e32 v10, v10, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, v8
+; GISEL-NEXT:    v_mov_b32_e32 v1, v9
+; GISEL-NEXT:    v_mov_b32_e32 v2, v10
+; GISEL-NEXT:    v_mov_b32_e32 v3, v11
+; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:    v_bfe_u32 v4, v0, 2, 1
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GISEL-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_mov_b32_e32 v8, 0
+; GISEL-NEXT:    v_and_b32_e32 v9, 0x800000, v1
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
+; GISEL-NEXT:    v_lshlrev_b64 v[8:9], 30, v[2:3]
+; GISEL-NEXT:    v_lshrrev_b32_e32 v5, 2, v1
+; GISEL-NEXT:    v_or_b32_e32 v9, v5, v8
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
+; GISEL-NEXT:    v_lshlrev_b64 v[2:3], 29, v[2:3]
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 3, v1
+; GISEL-NEXT:    v_or_b32_e32 v9, v0, v2
+; GISEL-NEXT:    v_mov_b32_e32 v6, v7
+; GISEL-NEXT:  ; %bb.12: ; %Flow
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:  .LBB3_13: ; %Flow4
+; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x3ff00000
+; GISEL-NEXT:    v_lshl_add_u32 v0, v6, 20, v0
+; GISEL-NEXT:    v_and_b32_e32 v1, 0xfffff, v9
+; GISEL-NEXT:    v_and_or_b32 v4, v4, -1, 0
+; GISEL-NEXT:    v_or3_b32 v5, v1, v0, 0
+; GISEL-NEXT:  .LBB3_14: ; %Flow5
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    v_mov_b32_e32 v0, v4
+; GISEL-NEXT:    v_mov_b32_e32 v1, v5
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %cvt = uitofp i128 %x to double
+  ret double %cvt
+}
+
+define half @sitofp_i128_to_f16(i128 %x) {
+; SDAG-LABEL: sitofp_i128_to_f16:
+; SDAG:       ; %bb.0: ; %itofp-entry
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_or_b32_e32 v5, v1, v3
+; SDAG-NEXT:    v_or_b32_e32 v4, v0, v2
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v4, 0
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB4_14
+; SDAG-NEXT:  ; %bb.1: ; %itofp-if-end
+; SDAG-NEXT:    v_ashrrev_i32_e32 v5, 31, v3
+; SDAG-NEXT:    v_xor_b32_e32 v0, v5, v0
+; SDAG-NEXT:    v_xor_b32_e32 v1, v5, v1
+; SDAG-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v5
+; SDAG-NEXT:    v_xor_b32_e32 v2, v5, v2
+; SDAG-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v5, vcc
+; SDAG-NEXT:    v_xor_b32_e32 v6, v5, v3
+; SDAG-NEXT:    v_subb_co_u32_e32 v4, vcc, v2, v5, vcc
+; SDAG-NEXT:    v_subb_co_u32_e32 v5, vcc, v6, v5, vcc
+; SDAG-NEXT:    v_ffbh_u32_e32 v2, v4
+; SDAG-NEXT:    v_add_u32_e32 v2, 32, v2
+; SDAG-NEXT:    v_ffbh_u32_e32 v6, v5
+; SDAG-NEXT:    v_min_u32_e32 v2, v2, v6
+; SDAG-NEXT:    v_ffbh_u32_e32 v6, v0
+; SDAG-NEXT:    v_add_u32_e32 v6, 32, v6
+; SDAG-NEXT:    v_ffbh_u32_e32 v7, v1
+; SDAG-NEXT:    v_min_u32_e32 v6, v6, v7
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; SDAG-NEXT:    v_add_u32_e32 v6, 64, v6
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, v6, v2, vcc
+; SDAG-NEXT:    v_sub_u32_e32 v6, 0x80, v7
+; SDAG-NEXT:    v_sub_u32_e32 v2, 0x7f, v7
+; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 25, v6
+; SDAG-NEXT:    ; implicit-def: $vgpr8
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
+; SDAG-NEXT:    v_add_u32_e32 v4, 0xffffff98, v7
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v0, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr6
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    ; implicit-def: $vgpr7
+; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT:  ; %bb.3: ; %Flow3
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB4_13
+; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
+; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v6
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB4_8
+; SDAG-NEXT:  ; %bb.5: ; %LeafBlock
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v6
+; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB4_7
+; SDAG-NEXT:  ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT:    v_sub_u32_e32 v12, 0x66, v7
+; SDAG-NEXT:    v_sub_u32_e32 v10, 64, v12
+; SDAG-NEXT:    v_lshrrev_b64 v[8:9], v12, v[0:1]
+; SDAG-NEXT:    v_lshlrev_b64 v[10:11], v10, v[4:5]
+; SDAG-NEXT:    v_sub_u32_e32 v13, 38, v7
+; SDAG-NEXT:    v_or_b32_e32 v11, v9, v11
+; SDAG-NEXT:    v_or_b32_e32 v10, v8, v10
+; SDAG-NEXT:    v_lshrrev_b64 v[8:9], v13, v[4:5]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v12
+; SDAG-NEXT:    v_add_u32_e32 v14, 26, v7
+; SDAG-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v12
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
+; SDAG-NEXT:    v_lshrrev_b64 v[10:11], v13, v[0:1]
+; SDAG-NEXT:    v_lshlrev_b64 v[12:13], v14, v[4:5]
+; SDAG-NEXT:    v_subrev_u32_e32 v7, 38, v7
+; SDAG-NEXT:    v_cndmask_b32_e64 v15, v8, v0, s[4:5]
+; SDAG-NEXT:    v_lshlrev_b64 v[7:8], v7, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e64 v9, v9, v1, s[4:5]
+; SDAG-NEXT:    v_or_b32_e32 v11, v13, v11
+; SDAG-NEXT:    v_or_b32_e32 v10, v12, v10
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v14, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v11, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v10, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v5, v8, v5, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v4, v7, v4, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT:    v_or_b32_e32 v1, v1, v5
+; SDAG-NEXT:    v_or_b32_e32 v0, v0, v4
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    v_or_b32_e32 v8, v15, v0
+; SDAG-NEXT:    v_mov_b32_e32 v0, v8
+; SDAG-NEXT:    v_mov_b32_e32 v1, v9
+; SDAG-NEXT:  .LBB4_7: ; %Flow1
+; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
+; SDAG-NEXT:  .LBB4_8: ; %Flow2
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
+; SDAG-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:    v_lshrrev_b32_e32 v4, 2, v0
+; SDAG-NEXT:    v_and_or_b32 v0, v4, 1, v0
+; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; SDAG-NEXT:    v_and_b32_e32 v4, 0x4000000, v0
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; SDAG-NEXT:    v_alignbit_b32 v8, v1, v0, 2
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:  ; %bb.11: ; %itofp-if-then20
+; SDAG-NEXT:    v_alignbit_b32 v8, v1, v0, 3
+; SDAG-NEXT:    v_mov_b32_e32 v2, v6
+; SDAG-NEXT:  ; %bb.12: ; %Flow
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:  .LBB4_13: ; %Flow4
+; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
+; SDAG-NEXT:    v_and_b32_e32 v0, 0x80000000, v3
+; SDAG-NEXT:    v_lshl_add_u32 v1, v2, 23, 1.0
+; SDAG-NEXT:    v_and_b32_e32 v2, 0x7fffff, v8
+; SDAG-NEXT:    v_or3_b32 v0, v2, v0, v1
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v4, v0
+; SDAG-NEXT:  .LBB4_14: ; %Flow5
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:    v_mov_b32_e32 v0, v4
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: sitofp_i128_to_f16:
+; GISEL:       ; %bb.0: ; %itofp-entry
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_or_b32_e32 v4, v0, v2
+; GISEL-NEXT:    v_or_b32_e32 v5, v1, v3
+; GISEL-NEXT:    s_mov_b32 s4, 0
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GISEL-NEXT:    v_mov_b32_e32 v4, s4
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB4_14
+; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
+; GISEL-NEXT:    v_xor_b32_e32 v0, v6, v0
+; GISEL-NEXT:    v_xor_b32_e32 v1, v6, v1
+; GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_xor_b32_e32 v2, v6, v2
+; GISEL-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v3, v6, v3
+; GISEL-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v6, vcc
+; GISEL-NEXT:    v_ffbh_u32_e32 v5, v0
+; GISEL-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v6, vcc
+; GISEL-NEXT:    v_ffbh_u32_e32 v4, v1
+; GISEL-NEXT:    v_add_u32_e32 v5, 32, v5
+; GISEL-NEXT:    v_ffbh_u32_e32 v7, v2
+; GISEL-NEXT:    v_min_u32_e32 v4, v4, v5
+; GISEL-NEXT:    v_ffbh_u32_e32 v5, v3
+; GISEL-NEXT:    v_add_u32_e32 v7, 32, v7
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    v_add_u32_e32 v4, 64, v4
+; GISEL-NEXT:    v_min_u32_e32 v5, v5, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GISEL-NEXT:    v_sub_u32_e32 v8, 0x80, v5
+; GISEL-NEXT:    v_sub_u32_e32 v7, 0x7f, v5
+; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 24, v8
+; GISEL-NEXT:    ; implicit-def: $vgpr4
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT:    ; implicit-def: $vgpr8
+; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr5
+; GISEL-NEXT:    ; implicit-def: $vgpr2
+; GISEL-NEXT:  ; %bb.3: ; %Flow3
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB4_13
+; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
+; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v8
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB4_8
+; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v8
+; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB4_7
+; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT:    v_sub_u32_e32 v4, 0x66, v5
+; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v4
+; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v4, v[0:1]
+; GISEL-NEXT:    v_lshlrev_b64 v[11:12], v11, v[2:3]
+; GISEL-NEXT:    v_subrev_u32_e32 v13, 64, v4
+; GISEL-NEXT:    v_or_b32_e32 v11, v9, v11
+; GISEL-NEXT:    v_or_b32_e32 v12, v10, v12
+; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v13, v[2:3]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GISEL-NEXT:    v_add_u32_e32 v5, 26, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v12, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v13, v9, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v10, v1, vcc
+; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v5, -1
+; GISEL-NEXT:    v_lshlrev_b64 v[11:12], v11, -1
+; GISEL-NEXT:    v_subrev_u32_e32 v14, 64, v5
+; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
+; GISEL-NEXT:    v_or_b32_e32 v16, v10, v12
+; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v14, -1
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v11, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, v12, -1, s[4:5]
+; GISEL-NEXT:    v_and_b32_e32 v2, v9, v2
+; GISEL-NEXT:    v_and_b32_e32 v3, v10, v3
+; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
+; GISEL-NEXT:    v_and_or_b32 v1, v11, v1, v3
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_or_b32_e32 v3, v13, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, v3
+; GISEL-NEXT:    v_mov_b32_e32 v1, v4
+; GISEL-NEXT:    v_mov_b32_e32 v2, v5
+; GISEL-NEXT:    v_mov_b32_e32 v3, v6
+; GISEL-NEXT:  .LBB4_7: ; %Flow1
+; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:  .LBB4_8: ; %Flow2
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
+; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:    v_bfe_u32 v2, v0, 2, 1
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_and_b32_e32 v2, 0x4000000, v0
+; GISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v7, v8
+; GISEL-NEXT:  ; %bb.12: ; %Flow
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:  .LBB4_13: ; %Flow4
+; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT:    v_and_b32_e32 v0, 0x80000000, v6
+; GISEL-NEXT:    v_lshl_add_u32 v1, v7, 23, 1.0
+; GISEL-NEXT:    v_and_b32_e32 v2, 0x7fffff, v4
+; GISEL-NEXT:    v_or3_b32 v0, v2, v0, v1
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v4, v0
+; GISEL-NEXT:  .LBB4_14: ; %Flow5
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    v_mov_b32_e32 v0, v4
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %cvt = sitofp i128 %x to half
+  ret half %cvt
+}
+
+define half @uitofp_i128_to_f16(i128 %x) {
+; SDAG-LABEL: uitofp_i128_to_f16:
+; SDAG:       ; %bb.0: ; %itofp-entry
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_or_b32_e32 v5, v1, v3
+; SDAG-NEXT:    v_or_b32_e32 v4, v0, v2
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v4, 0
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB5_14
+; SDAG-NEXT:  ; %bb.1: ; %itofp-if-end
+; SDAG-NEXT:    v_ffbh_u32_e32 v4, v2
+; SDAG-NEXT:    v_add_u32_e32 v4, 32, v4
+; SDAG-NEXT:    v_ffbh_u32_e32 v5, v3
+; SDAG-NEXT:    v_min_u32_e32 v4, v4, v5
+; SDAG-NEXT:    v_ffbh_u32_e32 v5, v0
+; SDAG-NEXT:    v_add_u32_e32 v5, 32, v5
+; SDAG-NEXT:    v_ffbh_u32_e32 v6, v1
+; SDAG-NEXT:    v_min_u32_e32 v5, v5, v6
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    v_add_u32_e32 v5, 64, v5
+; SDAG-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
+; SDAG-NEXT:    v_sub_u32_e32 v5, 0x80, v6
+; SDAG-NEXT:    v_sub_u32_e32 v4, 0x7f, v6
+; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 25, v5
+; SDAG-NEXT:    ; implicit-def: $vgpr7
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
+; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v6
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v0, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr5
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    ; implicit-def: $vgpr6
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:  ; %bb.3: ; %Flow3
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB5_13
+; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
+; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v5
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB5_8
+; SDAG-NEXT:  ; %bb.5: ; %LeafBlock
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v5
+; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB5_7
+; SDAG-NEXT:  ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT:    v_sub_u32_e32 v11, 0x66, v6
+; SDAG-NEXT:    v_sub_u32_e32 v9, 64, v11
+; SDAG-NEXT:    v_lshrrev_b64 v[7:8], v11, v[0:1]
+; SDAG-NEXT:    v_lshlrev_b64 v[9:10], v9, v[2:3]
+; SDAG-NEXT:    v_sub_u32_e32 v12, 38, v6
+; SDAG-NEXT:    v_or_b32_e32 v10, v8, v10
+; SDAG-NEXT:    v_or_b32_e32 v9, v7, v9
+; SDAG-NEXT:    v_lshrrev_b64 v[7:8], v12, v[2:3]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
+; SDAG-NEXT:    v_add_u32_e32 v13, 26, v6
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v9, vcc
+; SDAG-NEXT:    v_lshrrev_b64 v[9:10], v12, v[0:1]
+; SDAG-NEXT:    v_lshlrev_b64 v[11:12], v13, v[2:3]
+; SDAG-NEXT:    v_subrev_u32_e32 v6, 38, v6
+; SDAG-NEXT:    v_cndmask_b32_e64 v14, v7, v0, s[4:5]
+; SDAG-NEXT:    v_lshlrev_b64 v[6:7], v6, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s[4:5]
+; SDAG-NEXT:    v_or_b32_e32 v10, v12, v10
+; SDAG-NEXT:    v_or_b32_e32 v9, v11, v9
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v13, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v10, vcc
+; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
+; SDAG-NEXT:    v_cndmask_b32_e32 v6, v6, v9, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT:    v_or_b32_e32 v1, v1, v3
+; SDAG-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    v_or_b32_e32 v7, v14, v0
+; SDAG-NEXT:    v_mov_b32_e32 v0, v7
+; SDAG-NEXT:    v_mov_b32_e32 v1, v8
+; SDAG-NEXT:  .LBB5_7: ; %Flow1
+; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
+; SDAG-NEXT:  .LBB5_8: ; %Flow2
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
+; SDAG-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:    v_lshrrev_b32_e32 v2, 2, v0
+; SDAG-NEXT:    v_and_or_b32 v0, v2, 1, v0
+; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; SDAG-NEXT:    v_and_b32_e32 v2, 0x4000000, v0
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; SDAG-NEXT:    v_alignbit_b32 v7, v1, v0, 2
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:  ; %bb.11: ; %itofp-if-then20
+; SDAG-NEXT:    v_alignbit_b32 v7, v1, v0, 3
+; SDAG-NEXT:    v_mov_b32_e32 v4, v5
+; SDAG-NEXT:  ; %bb.12: ; %Flow
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:  .LBB5_13: ; %Flow4
+; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
+; SDAG-NEXT:    v_and_b32_e32 v0, 0x7fffff, v7
+; SDAG-NEXT:    v_lshl_or_b32 v0, v4, 23, v0
+; SDAG-NEXT:    v_add_u32_e32 v0, 1.0, v0
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v4, v0
+; SDAG-NEXT:  .LBB5_14: ; %Flow5
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:    v_mov_b32_e32 v0, v4
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: uitofp_i128_to_f16:
+; GISEL:       ; %bb.0: ; %itofp-entry
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_or_b32_e32 v4, v0, v2
+; GISEL-NEXT:    v_or_b32_e32 v5, v1, v3
+; GISEL-NEXT:    s_mov_b32 s4, 0
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GISEL-NEXT:    v_mov_b32_e32 v4, s4
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB5_14
+; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
+; GISEL-NEXT:    v_ffbh_u32_e32 v5, v0
+; GISEL-NEXT:    v_ffbh_u32_e32 v4, v1
+; GISEL-NEXT:    v_add_u32_e32 v5, 32, v5
+; GISEL-NEXT:    v_ffbh_u32_e32 v6, v2
+; GISEL-NEXT:    v_min_u32_e32 v4, v4, v5
+; GISEL-NEXT:    v_ffbh_u32_e32 v5, v3
+; GISEL-NEXT:    v_add_u32_e32 v6, 32, v6
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    v_add_u32_e32 v4, 64, v4
+; GISEL-NEXT:    v_min_u32_e32 v5, v5, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
+; GISEL-NEXT:    v_sub_u32_e32 v7, 0x80, v5
+; GISEL-NEXT:    v_sub_u32_e32 v6, 0x7f, v5
+; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 24, v7
+; GISEL-NEXT:    ; implicit-def: $vgpr4
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT:    ; implicit-def: $vgpr7
+; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr5
+; GISEL-NEXT:    ; implicit-def: $vgpr2
+; GISEL-NEXT:  ; %bb.3: ; %Flow3
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB5_13
+; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
+; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v7
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB5_8
+; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v7
+; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB5_7
+; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT:    v_sub_u32_e32 v4, 0x66, v5
+; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v4
+; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v4, v[0:1]
+; GISEL-NEXT:    v_lshlrev_b64 v[10:11], v10, v[2:3]
+; GISEL-NEXT:    v_subrev_u32_e32 v12, 64, v4
+; GISEL-NEXT:    v_or_b32_e32 v10, v8, v10
+; GISEL-NEXT:    v_or_b32_e32 v11, v9, v11
+; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v12, v[2:3]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GISEL-NEXT:    v_add_u32_e32 v5, 26, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v8, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v9, v1, vcc
+; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v5, -1
+; GISEL-NEXT:    v_lshlrev_b64 v[10:11], v10, -1
+; GISEL-NEXT:    v_subrev_u32_e32 v13, 64, v5
+; GISEL-NEXT:    v_or_b32_e32 v14, v8, v10
+; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
+; GISEL-NEXT:    v_lshrrev_b64 v[10:11], v13, -1
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v14, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v10, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v11, -1, s[4:5]
+; GISEL-NEXT:    v_and_b32_e32 v2, v8, v2
+; GISEL-NEXT:    v_and_b32_e32 v3, v9, v3
+; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
+; GISEL-NEXT:    v_and_or_b32 v1, v10, v1, v3
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_or_b32_e32 v3, v12, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, v3
+; GISEL-NEXT:    v_mov_b32_e32 v1, v4
+; GISEL-NEXT:    v_mov_b32_e32 v2, v5
+; GISEL-NEXT:    v_mov_b32_e32 v3, v6
+; GISEL-NEXT:  .LBB5_7: ; %Flow1
+; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT:  .LBB5_8: ; %Flow2
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
+; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:    v_bfe_u32 v2, v0, 2, 1
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_and_b32_e32 v2, 0x4000000, v0
+; GISEL-NEXT:    v_mov_b32_e32 v3, 0
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
+; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v6, v7
+; GISEL-NEXT:  ; %bb.12: ; %Flow
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:  .LBB5_13: ; %Flow4
+; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT:    v_lshl_add_u32 v0, v6, 23, 1.0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fffff
+; GISEL-NEXT:    v_and_or_b32 v0, v4, v1, v0
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v4, v0
+; GISEL-NEXT:  .LBB5_14: ; %Flow5
+; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    v_mov_b32_e32 v0, v4
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %cvt = uitofp i128 %x to half
+  ret half %cvt
+}
+
+; FIXME: ExpandLargeFpConvert asserts on bfloat
+; define bfloat @sitofp_i128_to_bf16(i128 %x) {
+;   %cvt = sitofp i128 %x to bfloat
+;   ret bfloat %cvt
+; }
+
+; define bfloat @uitofp_i128_to_bf16(i128 %x) {
+;   %cvt = uitofp i128 %x to bfloat
+;   ret bfloat %cvt
+; }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AVR/bug-81911.ll b/llvm/test/CodeGen/AVR/bug-81911.ll
new file mode 100644
index 0000000..2a22666
--- /dev/null
+++ b/llvm/test/CodeGen/AVR/bug-81911.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=avr -mcpu=atmega328 -O1 -verify-machineinstrs | FileCheck %s
+
+define internal i8 @main() {
+; CHECK-LABEL: main:
+; CHECK:       ; %bb.0: ; %bb0
+; CHECK-NEXT:    push r2
+; CHECK-NEXT:    push r3
+; CHECK-NEXT:    push r4
+; CHECK-NEXT:    push r5
+; CHECK-NEXT:    push r6
+; CHECK-NEXT:    push r7
+; CHECK-NEXT:    push r8
+; CHECK-NEXT:    push r9
+; CHECK-NEXT:    push r10
+; CHECK-NEXT:    push r11
+; CHECK-NEXT:    push r12
+; CHECK-NEXT:    push r13
+; CHECK-NEXT:    push r14
+; CHECK-NEXT:    push r15
+; CHECK-NEXT:    push r16
+; CHECK-NEXT:    push r17
+; CHECK-NEXT:    push r28
+; CHECK-NEXT:    push r29
+; CHECK-NEXT:    in r28, 61
+; CHECK-NEXT:    in r29, 62
+; CHECK-NEXT:    sbiw r28, 13
+; CHECK-NEXT:    in r0, 63
+; CHECK-NEXT:    cli
+; CHECK-NEXT:    out 62, r29
+; CHECK-NEXT:    out 63, r0
+; CHECK-NEXT:    out 61, r28
+; CHECK-NEXT:    ldi r16, 0
+; CHECK-NEXT:    ldi r17, 0
+; CHECK-NEXT:    ldi r18, -1
+; CHECK-NEXT:    ;APP
+; CHECK-NEXT:    ldi r24, 123
+; CHECK-NEXT:    ;NO_APP
+; CHECK-NEXT:    std Y+1, r24 ; 1-byte Folded Spill
+; CHECK-NEXT:    movw r24, r28
+; CHECK-NEXT:    adiw r24, 6
+; CHECK-NEXT:    std Y+3, r25 ; 2-byte Folded Spill
+; CHECK-NEXT:    std Y+2, r24 ; 2-byte Folded Spill
+; CHECK-NEXT:    movw r8, r16
+; CHECK-NEXT:    movw r6, r16
+; CHECK-NEXT:    movw r4, r16
+; CHECK-NEXT:    movw r2, r16
+; CHECK-NEXT:    rjmp .LBB0_2
+; CHECK-NEXT:  .LBB0_1: ; %bb1
+; CHECK-NEXT:    ; in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    andi r30, 1
+; CHECK-NEXT:    ldd r31, Y+4 ; 1-byte Folded Reload
+; CHECK-NEXT:    dec r31
+; CHECK-NEXT:    cpi r30, 0
+; CHECK-NEXT:    movw r8, r18
+; CHECK-NEXT:    movw r6, r20
+; CHECK-NEXT:    movw r4, r22
+; CHECK-NEXT:    movw r2, r24
+; CHECK-NEXT:    mov r18, r31
+; CHECK-NEXT:    brne .LBB0_2
+; CHECK-NEXT:    rjmp .LBB0_4
+; CHECK-NEXT:  .LBB0_2: ; %bb1
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    std Y+4, r18 ; 1-byte Folded Spill
+; CHECK-NEXT:    movw r18, r8
+; CHECK-NEXT:    movw r20, r6
+; CHECK-NEXT:    movw r22, r4
+; CHECK-NEXT:    movw r24, r2
+; CHECK-NEXT:    ldi r26, 10
+; CHECK-NEXT:    ldi r27, 0
+; CHECK-NEXT:    movw r10, r26
+; CHECK-NEXT:    movw r12, r16
+; CHECK-NEXT:    movw r14, r16
+; CHECK-NEXT:    call __udivdi3
+; CHECK-NEXT:    std Y+13, r25
+; CHECK-NEXT:    std Y+12, r24
+; CHECK-NEXT:    std Y+11, r23
+; CHECK-NEXT:    std Y+10, r22
+; CHECK-NEXT:    std Y+9, r21
+; CHECK-NEXT:    std Y+8, r20
+; CHECK-NEXT:    std Y+7, r19
+; CHECK-NEXT:    std Y+6, r18
+; CHECK-NEXT:    ldd r30, Y+2 ; 2-byte Folded Reload
+; CHECK-NEXT:    ldd r31, Y+3 ; 2-byte Folded Reload
+; CHECK-NEXT:    ;APP
+; CHECK-NEXT:    ;NO_APP
+; CHECK-NEXT:    ldi r30, 1
+; CHECK-NEXT:    cp r8, r1
+; CHECK-NEXT:    cpc r9, r1
+; CHECK-NEXT:    cpc r6, r16
+; CHECK-NEXT:    cpc r7, r17
+; CHECK-NEXT:    cpc r4, r16
+; CHECK-NEXT:    cpc r5, r17
+; CHECK-NEXT:    cpc r2, r16
+; CHECK-NEXT:    cpc r3, r17
+; CHECK-NEXT:    breq .LBB0_3
+; CHECK-NEXT:    rjmp .LBB0_1
+; CHECK-NEXT:  .LBB0_3: ; %bb1
+; CHECK-NEXT:    ; in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    mov r30, r1
+; CHECK-NEXT:    rjmp .LBB0_1
+; CHECK-NEXT:  .LBB0_4: ; %bb3
+; CHECK-NEXT:    ldd r24, Y+1 ; 1-byte Folded Reload
+; CHECK-NEXT:    std Y+5, r24
+; CHECK-NEXT:    movw r24, r28
+; CHECK-NEXT:    adiw r24, 5
+; CHECK-NEXT:    ;APP
+; CHECK-NEXT:    ;NO_APP
+; CHECK-NEXT:    ldd r24, Y+5
+; CHECK-NEXT:    adiw r28, 13
+; CHECK-NEXT:    in r0, 63
+; CHECK-NEXT:    cli
+; CHECK-NEXT:    out 62, r29
+; CHECK-NEXT:    out 63, r0
+; CHECK-NEXT:    out 61, r28
+; CHECK-NEXT:    pop r29
+; CHECK-NEXT:    pop r28
+; CHECK-NEXT:    pop r17
+; CHECK-NEXT:    pop r16
+; CHECK-NEXT:    pop r15
+; CHECK-NEXT:    pop r14
+; CHECK-NEXT:    pop r13
+; CHECK-NEXT:    pop r12
+; CHECK-NEXT:    pop r11
+; CHECK-NEXT:    pop r10
+; CHECK-NEXT:    pop r9
+; CHECK-NEXT:    pop r8
+; CHECK-NEXT:    pop r7
+; CHECK-NEXT:    pop r6
+; CHECK-NEXT:    pop r5
+; CHECK-NEXT:    pop r4
+; CHECK-NEXT:    pop r3
+; CHECK-NEXT:    pop r2
+; CHECK-NEXT:    ret
+bb0:
+  %0 = alloca i64
+  %1 = alloca i8
+  %2 = tail call i8 asm sideeffect "ldi ${0}, 123", "=&r,~{sreg},~{memory}"()
+
+  br label %bb1
+
+bb1:
+  %3 = phi i64 [ %5, %bb1 ], [ 0, %bb0 ]
+  %4 = phi i8 [ %6, %bb1 ], [ 0, %bb0 ]
+  %5 = udiv i64 %3, 10
+  %6 = add i8 %4, 1
+
+  store i64 %5, ptr %0
+  call void asm sideeffect "", "r,~{memory}"(ptr %0)
+
+  %7 = icmp eq i64 %3, 0
+  %8 = icmp eq i8 %6, 0
+
+  br i1 %7, label %bb3, label %bb1
+
+bb3:
+  store i8 %2, ptr %1
+  call void asm sideeffect "", "r,~{memory}"(ptr %1)
+
+  %9 = load i8, ptr %1
+
+  ret i8 %9
+}
diff --git a/llvm/test/CodeGen/BPF/addr-space-globals.ll b/llvm/test/CodeGen/BPF/addr-space-globals.ll
index 878ba0d..73e80b7 100644
--- a/llvm/test/CodeGen/BPF/addr-space-globals.ll
+++ b/llvm/test/CodeGen/BPF/addr-space-globals.ll
@@ -18,7 +18,7 @@
 
 ; Verify that a,b,c reside in the same section
 
-; CHECK:     .section .arena.272,"aw",@progbits
+; CHECK:     .section .addr_space.272,"aw",@progbits
 ; CHECK-NOT: .section
 ; CHECK:     .globl  a
 ; CHECK:     .ascii  "\001\002"
diff --git a/llvm/test/CodeGen/BPF/addr-space-globals2.ll b/llvm/test/CodeGen/BPF/addr-space-globals2.ll
index d1e2318..5944cb2 100644
--- a/llvm/test/CodeGen/BPF/addr-space-globals2.ll
+++ b/llvm/test/CodeGen/BPF/addr-space-globals2.ll
@@ -14,12 +14,12 @@
 
 ; Verify that a,b reside in separate sections
 
-; CHECK:     .section .arena.1,"aw",@progbits
+; CHECK:     .section .addr_space.1,"aw",@progbits
 ; CHECK-NOT: .section
 ; CHECK:     .globl  a
 ; CHECK:     .ascii  "\001\002"
 
-; CHECK:     .section .arena.2,"aw",@progbits
+; CHECK:     .section .addr_space.2,"aw",@progbits
 ; CHECK-NOT: .section
 ; CHECK:     .globl  b
 ; CHECK:     .ascii  "\003\004"
diff --git a/llvm/test/CodeGen/DirectX/any.ll b/llvm/test/CodeGen/DirectX/any.ll
new file mode 100644
index 0000000..e8d8707
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/any.ll
@@ -0,0 +1,113 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for any are generated for float and half.
+
+; CHECK-LABEL: any_bool
+; CHECK: icmp ne i1 %{{.*}}, false
+define noundef i1 @any_bool(i1 noundef %p0) {
+entry:
+  %p0.addr = alloca i8, align 1
+  %frombool = zext i1 %p0 to i8
+  store i8 %frombool, ptr %p0.addr, align 1
+  %0 = load i8, ptr %p0.addr, align 1
+  %tobool = trunc i8 %0 to i1
+  %dx.any = call i1 @llvm.dx.any.i1(i1 %tobool)
+  ret i1 %dx.any
+}
+
+; CHECK-LABEL: any_int64_t
+; CHECK: icmp ne i64 %{{.*}}, 0
+define noundef i1 @any_int64_t(i64 noundef %p0) {
+entry:
+  %p0.addr = alloca i64, align 8
+  store i64 %p0, ptr %p0.addr, align 8
+  %0 = load i64, ptr %p0.addr, align 8
+  %dx.any = call i1 @llvm.dx.any.i64(i64 %0)
+  ret i1 %dx.any
+}
+
+; CHECK-LABEL: any_int
+; CHECK: icmp ne i32 %{{.*}}, 0
+define noundef i1 @any_int(i32 noundef %p0) {
+entry:
+  %p0.addr = alloca i32, align 4
+  store i32 %p0, ptr %p0.addr, align 4
+  %0 = load i32, ptr %p0.addr, align 4
+  %dx.any = call i1 @llvm.dx.any.i32(i32 %0)
+  ret i1 %dx.any
+}
+
+; CHECK-LABEL: any_int16_t
+; CHECK: icmp ne i16 %{{.*}}, 0
+define noundef i1 @any_int16_t(i16 noundef %p0) {
+entry:
+  %p0.addr = alloca i16, align 2
+  store i16 %p0, ptr %p0.addr, align 2
+  %0 = load i16, ptr %p0.addr, align 2
+  %dx.any = call i1 @llvm.dx.any.i16(i16 %0)
+  ret i1 %dx.any
+}
+
+; CHECK-LABEL: any_double
+; CHECK: fcmp une double %{{.*}}, 0.000000e+00
+define noundef i1 @any_double(double noundef %p0) {
+entry:
+  %p0.addr = alloca double, align 8
+  store double %p0, ptr %p0.addr, align 8
+  %0 = load double, ptr %p0.addr, align 8
+  %dx.any = call i1 @llvm.dx.any.f64(double %0)
+  ret i1 %dx.any
+}
+
+; CHECK-LABEL: any_float
+; CHECK: fcmp une float %{{.*}}, 0.000000e+00
+define noundef i1 @any_float(float noundef %p0) {
+entry:
+  %p0.addr = alloca float, align 4
+  store float %p0, ptr %p0.addr, align 4
+  %0 = load float, ptr %p0.addr, align 4
+  %dx.any = call i1 @llvm.dx.any.f32(float %0)
+  ret i1 %dx.any
+}
+
+; CHECK-LABEL: any_half
+; CHECK: fcmp une half %{{.*}}, 0xH0000
+define noundef i1 @any_half(half noundef %p0) {
+entry:
+  %p0.addr = alloca half, align 2
+  store half %p0, ptr %p0.addr, align 2
+  %0 = load half, ptr %p0.addr, align 2
+  %dx.any = call i1 @llvm.dx.any.f16(half %0)
+  ret i1 %dx.any
+}
+
+; CHECK-LABEL: any_bool4
+; CHECK: icmp ne <4 x i1> %extractvec, zeroinitialize
+; CHECK: extractelement <4 x i1> %{{.*}}, i64 0
+; CHECK: extractelement <4 x i1> %{{.*}}, i64 1
+; CHECK: or i1  %{{.*}}, %{{.*}}
+; CHECK: extractelement <4 x i1> %{{.*}}, i64 2
+; CHECK: or i1  %{{.*}}, %{{.*}}
+; CHECK: extractelement <4 x i1> %{{.*}}, i64 3
+; CHECK: or i1  %{{.*}}, %{{.*}}
+define noundef i1 @any_bool4(<4 x i1> noundef %p0) {
+entry:
+  %p0.addr = alloca i8, align 1
+  %insertvec = shufflevector <4 x i1> %p0, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+  %0 = bitcast <8 x i1> %insertvec to i8
+  store i8 %0, ptr %p0.addr, align 1
+  %load_bits = load i8, ptr %p0.addr, align 1
+  %1 = bitcast i8 %load_bits to <8 x i1>
+  %extractvec = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %dx.any = call i1 @llvm.dx.any.v4i1(<4 x i1> %extractvec)
+  ret i1 %dx.any
+}
+
+declare i1 @llvm.dx.any.v4i1(<4 x i1>)
+declare i1 @llvm.dx.any.i1(i1)
+declare i1 @llvm.dx.any.i16(i16)
+declare i1 @llvm.dx.any.i32(i32)
+declare i1 @llvm.dx.any.i64(i64)
+declare i1 @llvm.dx.any.f16(half)
+declare i1 @llvm.dx.any.f32(float)
+declare i1 @llvm.dx.any.f64(double)
diff --git a/llvm/test/CodeGen/DirectX/exp-vec.ll b/llvm/test/CodeGen/DirectX/exp-vec.ll
new file mode 100644
index 0000000..c937155
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/exp-vec.ll
@@ -0,0 +1,17 @@
+; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s
+
+; Make sure dxil operation function calls for exp are generated for float and half.
+
+; CHECK-LABEL: exp_float4
+; CHECK: fmul <4 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>,  %{{.*}}
+; CHECK: call <4 x float> @llvm.exp2.v4f32(<4 x float>  %{{.*}})
+define noundef <4 x float> @exp_float4(<4 x float> noundef %p0) {
+entry:
+  %p0.addr = alloca <4 x float>, align 16
+  store <4 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <4 x float>, ptr %p0.addr, align 16
+  %elt.exp = call <4 x float> @llvm.exp.v4f32(<4 x float> %0)
+  ret <4 x float> %elt.exp
+}
+
+declare <4 x float> @llvm.exp.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/exp.ll b/llvm/test/CodeGen/DirectX/exp.ll
new file mode 100644
index 0000000..fdafc14
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/exp.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for exp are generated for float and half.
+
+; CHECK-LABEL: exp_float
+; CHECK: fmul float 0x3FF7154760000000, %{{.*}}
+; CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}})
+define noundef float @exp_float(float noundef %a) {
+entry:
+  %a.addr = alloca float, align 4
+  store float %a, ptr %a.addr, align 4
+  %0 = load float, ptr %a.addr, align 4
+  %elt.exp = call float @llvm.exp.f32(float %0)
+  ret float %elt.exp
+}
+
+; CHECK-LABEL: exp_half
+; CHECK: fmul half 0xH3DC5, %{{.*}}
+; CHECK: call half @dx.op.unary.f16(i32 21, half %{{.*}})
+; Function Attrs: noinline nounwind optnone
+define noundef half @exp_half(half noundef %a) {
+entry:
+  %a.addr = alloca half, align 2
+  store half %a, ptr %a.addr, align 2
+  %0 = load half, ptr %a.addr, align 2
+  %elt.exp = call half @llvm.exp.f16(half %0)
+  ret half %elt.exp
+}
+
+declare half @llvm.exp.f16(half)
+declare float @llvm.exp.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/lerp.ll b/llvm/test/CodeGen/DirectX/lerp.ll
new file mode 100644
index 0000000..ebd7e13
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/lerp.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for lerp are generated for float and half.
+
+; CHECK-LABEL: lerp_half
+; CHECK: fsub half %{{.*}}, %{{.*}}
+; CHECK: fmul half %{{.*}}, %{{.*}}
+; CHECK: fadd half %{{.*}}, %{{.*}}
+define noundef half @lerp_half(half noundef %p0) {
+entry:
+  %p0.addr = alloca half, align 2
+  store half %p0, ptr %p0.addr, align 2
+  %0 = load half, ptr %p0.addr, align 2
+  %1 = load half, ptr %p0.addr, align 2
+  %2 = load half, ptr %p0.addr, align 2
+  %dx.lerp = call half @llvm.dx.lerp.f16(half %0, half %1, half %2)
+  ret half %dx.lerp
+}
+
+; CHECK-LABEL: lerp_float
+; CHECK: fsub float %{{.*}}, %{{.*}}
+; CHECK: fmul float %{{.*}}, %{{.*}}
+; CHECK: fadd float %{{.*}}, %{{.*}}
+define noundef float @lerp_float(float noundef %p0, float noundef %p1) {
+entry:
+  %p1.addr = alloca float, align 4
+  %p0.addr = alloca float, align 4
+  store float %p1, ptr %p1.addr, align 4
+  store float %p0, ptr %p0.addr, align 4
+  %0 = load float, ptr %p0.addr, align 4
+  %1 = load float, ptr %p0.addr, align 4
+  %2 = load float, ptr %p0.addr, align 4
+  %dx.lerp = call float @llvm.dx.lerp.f32(float %0, float %1, float %2)
+  ret float %dx.lerp
+}
+
+; CHECK-LABEL: lerp_float4
+; CHECK: fsub <4 x float> %{{.*}}, %{{.*}}
+; CHECK: fmul <4 x float> %{{.*}}, %{{.*}}
+; CHECK: fadd <4 x float> %{{.*}}, %{{.*}}
+define noundef <4 x float> @lerp_float4(<4 x float> noundef %p0, <4 x float> noundef %p1) {
+entry:
+  %p1.addr = alloca <4 x float>, align 16
+  %p0.addr = alloca <4 x float>, align 16
+  store <4 x float> %p1, ptr %p1.addr, align 16
+  store <4 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <4 x float>, ptr %p0.addr, align 16
+  %1 = load <4 x float>, ptr %p0.addr, align 16
+  %2 = load <4 x float>, ptr %p0.addr, align 16
+  %dx.lerp = call <4 x float> @llvm.dx.lerp.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2)
+  ret <4 x float> %dx.lerp
+}
+
+declare half @llvm.dx.lerp.f16(half, half, half)
+declare float @llvm.dx.lerp.f32(float, float, float)
+declare <4 x float> @llvm.dx.lerp.v4f32(<4 x float>, <4 x float>, <4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/rcp.ll b/llvm/test/CodeGen/DirectX/rcp.ll
new file mode 100644
index 0000000..65abe83
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/rcp.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for rcp are generated for float, double, and half.
+
+; CHECK-LABEL: rcp_float4
+; CHECK: fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+define noundef <4 x float> @rcp_float4(<4 x float> noundef %p0) {
+entry:
+  %p0.addr = alloca <4 x float>, align 16
+  store <4 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <4 x float>, ptr %p0.addr, align 16
+  %dx.rcp = call <4 x float> @llvm.dx.rcp.v4f32(<4 x float> %0)
+  ret <4 x float> %dx.rcp
+}
+
+; CHECK-LABEL: rcp_double4
+; CHECK: fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %{{.*}}
+define noundef <4 x double> @rcp_double4(<4 x double> noundef %p0) {
+entry:
+  %p0.addr = alloca <4 x double>, align 16
+  store <4 x double> %p0, ptr %p0.addr, align 16
+  %0 = load <4 x double>, ptr %p0.addr, align 16
+  %dx.rcp = call <4 x double> @llvm.dx.rcp.v4f64(<4 x double> %0)
+  ret <4 x double> %dx.rcp
+}
+
+; CHECK-LABEL: rcp_half4
+; CHECK: fdiv <4 x half> <half  0xH3C00, half  0xH3C00, half  0xH3C00, half  0xH3C00>, %{{.*}} 
+define noundef <4 x half> @rcp_half4(<4 x half> noundef %p0) {
+entry:
+  %p0.addr = alloca <4 x half>, align 16
+  store <4 x half> %p0, ptr %p0.addr, align 16
+  %0 = load <4 x half>, ptr %p0.addr, align 16
+  %dx.rcp = call <4 x half> @llvm.dx.rcp.v4f16(<4 x half> %0)
+  ret <4 x half> %dx.rcp
+}
+
+; CHECK-LABEL: rcp_half
+; CHECK: fdiv half 0xH3C00, %{{.*}} 
+define noundef half @rcp_half(half noundef %p0) {
+entry:
+  %p0.addr = alloca half, align 2
+  store half %p0, ptr %p0.addr, align 2
+  %0 = load half, ptr %p0.addr, align 2
+  %dx.rcp = call half @llvm.dx.rcp.f16(half %0)
+  ret half %dx.rcp
+}
+
+declare half @llvm.dx.rcp.f16(half)
+declare <4 x half> @llvm.dx.rcp.v4f16(<4 x half>)
+declare <4 x float> @llvm.dx.rcp.v4f32(<4 x float>)
+declare <4 x double> @llvm.dx.rcp.v4f64(<4 x double>)
diff --git a/llvm/test/CodeGen/DirectX/rsqrt.ll b/llvm/test/CodeGen/DirectX/rsqrt.ll
new file mode 100644
index 0000000..52af0e6
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/rsqrt.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for rsqrt are generated for float and half.
+
+; CHECK-LABEL: rsqrt_float
+; CHECK: call float @dx.op.unary.f32(i32 25, float %{{.*}})
+define noundef float @rsqrt_float(float noundef %a) {
+entry:
+  %a.addr = alloca float, align 4
+  store float %a, ptr %a.addr, align 4
+  %0 = load float, ptr %a.addr, align 4
+  %dx.rsqrt = call float @llvm.dx.rsqrt.f32(float %0)
+  ret float %dx.rsqrt
+}
+
+; CHECK-LABEL: rsqrt_half
+; CHECK: call half @dx.op.unary.f16(i32 25, half %{{.*}})
+define noundef half @rsqrt_half(half noundef %a) {
+entry:
+  %a.addr = alloca half, align 2
+  store half %a, ptr %a.addr, align 2
+  %0 = load half, ptr %a.addr, align 2
+  %dx.rsqrt = call half @llvm.dx.rsqrt.f16(half %0)
+  ret half %dx.rsqrt
+}
+
+declare half @llvm.dx.rsqrt.f16(half)
+declare float @llvm.dx.rsqrt.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/rsqrt_error.ll b/llvm/test/CodeGen/DirectX/rsqrt_error.ll
new file mode 100644
index 0000000..9cd5002
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/rsqrt_error.ll
@@ -0,0 +1,14 @@
+; RUN: not opt -S -dxil-op-lower %s 2>&1 | FileCheck %s
+
+; DXIL operation rsqrt does not support double overload type
+; CHECK: LLVM ERROR: Invalid Overload Type
+
+; Function Attrs: noinline nounwind optnone
+define noundef double @rsqrt_double(double noundef %a) #0 {
+entry:
+  %a.addr = alloca double, align 8
+  store double %a, ptr %a.addr, align 8
+  %0 = load double, ptr %a.addr, align 8
+  %dx.rsqrt = call double @llvm.dx.rsqrt.f64(double %0)
+  ret double %dx.rsqrt
+}
diff --git a/llvm/test/CodeGen/Generic/ForceStackAlign.ll b/llvm/test/CodeGen/Generic/ForceStackAlign.ll
index 2c35ad3..7993b3e 100644
--- a/llvm/test/CodeGen/Generic/ForceStackAlign.ll
+++ b/llvm/test/CodeGen/Generic/ForceStackAlign.ll
@@ -8,7 +8,7 @@
 ; Stack realignment not supported.
 ; XFAIL: target=sparc{{.*}}
 
-; NVPTX cannot select dynamic_stackalloc
+; NVPTX can only select dynamic_stackalloc on sm_52+ and with ptx73+
 ; XFAIL: target=nvptx{{.*}}
 
 define i32 @f(ptr %p) nounwind {
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
new file mode 100644
index 0000000..9cc45fb
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK64
+; RUN: llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx62 | FileCheck %s --check-prefixes=CHECKPTX62
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx62 | %ptxas-verify -arch=sm_70 %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %val) {
+; CHECK-LABEL: test(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<7>;
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [test_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs1, [test_param_3];
+; CHECK-NEXT:    atom.add.noftz.f16 %rs2, [%r1], %rs1;
+; CHECK-NEXT:    ld.param.u32 %r2, [test_param_1];
+; CHECK-NEXT:    mov.b16 %rs3, 0x3C00;
+; CHECK-NEXT:    atom.add.noftz.f16 %rs4, [%r1], %rs3;
+; CHECK-NEXT:    ld.param.u32 %r3, [test_param_2];
+; CHECK-NEXT:    atom.global.add.noftz.f16 %rs5, [%r2], %rs1;
+; CHECK-NEXT:    atom.shared.add.noftz.f16 %rs6, [%r3], %rs1;
+; CHECK-NEXT:    ret;
+;
+; CHECK64-LABEL: test(
+; CHECK64:       {
+; CHECK64-NEXT:    .reg .b16 %rs<7>;
+; CHECK64-NEXT:    .reg .b64 %rd<4>;
+; CHECK64-EMPTY:
+; CHECK64-NEXT:  // %bb.0:
+; CHECK64-NEXT:    ld.param.u64 %rd1, [test_param_0];
+; CHECK64-NEXT:    ld.param.b16 %rs1, [test_param_3];
+; CHECK64-NEXT:    atom.add.noftz.f16 %rs2, [%rd1], %rs1;
+; CHECK64-NEXT:    ld.param.u64 %rd2, [test_param_1];
+; CHECK64-NEXT:    mov.b16 %rs3, 0x3C00;
+; CHECK64-NEXT:    atom.add.noftz.f16 %rs4, [%rd1], %rs3;
+; CHECK64-NEXT:    ld.param.u64 %rd3, [test_param_2];
+; CHECK64-NEXT:    atom.global.add.noftz.f16 %rs5, [%rd2], %rs1;
+; CHECK64-NEXT:    atom.shared.add.noftz.f16 %rs6, [%rd3], %rs1;
+; CHECK64-NEXT:    ret;
+;
+; CHECKPTX62-LABEL: test(
+; CHECKPTX62:       {
+; CHECKPTX62-NEXT:    .reg .pred %p<5>;
+; CHECKPTX62-NEXT:    .reg .b16 %rs<19>;
+; CHECKPTX62-NEXT:    .reg .b32 %r<58>;
+; CHECKPTX62-EMPTY:
+; CHECKPTX62-NEXT:  // %bb.0:
+; CHECKPTX62-NEXT:    ld.param.b16 %rs1, [test_param_3];
+; CHECKPTX62-NEXT:    ld.param.u32 %r23, [test_param_2];
+; CHECKPTX62-NEXT:    ld.param.u32 %r22, [test_param_1];
+; CHECKPTX62-NEXT:    ld.param.u32 %r24, [test_param_0];
+; CHECKPTX62-NEXT:    and.b32 %r1, %r24, -4;
+; CHECKPTX62-NEXT:    and.b32 %r25, %r24, 3;
+; CHECKPTX62-NEXT:    shl.b32 %r2, %r25, 3;
+; CHECKPTX62-NEXT:    mov.b32 %r26, 65535;
+; CHECKPTX62-NEXT:    shl.b32 %r27, %r26, %r2;
+; CHECKPTX62-NEXT:    not.b32 %r3, %r27;
+; CHECKPTX62-NEXT:    ld.u32 %r54, [%r1];
+; CHECKPTX62-NEXT:  $L__BB0_1: // %atomicrmw.start
+; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECKPTX62-NEXT:    shr.u32 %r28, %r54, %r2;
+; CHECKPTX62-NEXT:    cvt.u16.u32 %rs2, %r28;
+; CHECKPTX62-NEXT:    add.rn.f16 %rs4, %rs2, %rs1;
+; CHECKPTX62-NEXT:    cvt.u32.u16 %r29, %rs4;
+; CHECKPTX62-NEXT:    shl.b32 %r30, %r29, %r2;
+; CHECKPTX62-NEXT:    and.b32 %r31, %r54, %r3;
+; CHECKPTX62-NEXT:    or.b32 %r32, %r31, %r30;
+; CHECKPTX62-NEXT:    atom.cas.b32 %r6, [%r1], %r54, %r32;
+; CHECKPTX62-NEXT:    setp.ne.s32 %p1, %r6, %r54;
+; CHECKPTX62-NEXT:    mov.u32 %r54, %r6;
+; CHECKPTX62-NEXT:    @%p1 bra $L__BB0_1;
+; CHECKPTX62-NEXT:  // %bb.2: // %atomicrmw.end
+; CHECKPTX62-NEXT:    ld.u32 %r55, [%r1];
+; CHECKPTX62-NEXT:  $L__BB0_3: // %atomicrmw.start9
+; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECKPTX62-NEXT:    shr.u32 %r33, %r55, %r2;
+; CHECKPTX62-NEXT:    cvt.u16.u32 %rs6, %r33;
+; CHECKPTX62-NEXT:    mov.b16 %rs8, 0x3C00;
+; CHECKPTX62-NEXT:    add.rn.f16 %rs9, %rs6, %rs8;
+; CHECKPTX62-NEXT:    cvt.u32.u16 %r34, %rs9;
+; CHECKPTX62-NEXT:    shl.b32 %r35, %r34, %r2;
+; CHECKPTX62-NEXT:    and.b32 %r36, %r55, %r3;
+; CHECKPTX62-NEXT:    or.b32 %r37, %r36, %r35;
+; CHECKPTX62-NEXT:    atom.cas.b32 %r9, [%r1], %r55, %r37;
+; CHECKPTX62-NEXT:    setp.ne.s32 %p2, %r9, %r55;
+; CHECKPTX62-NEXT:    mov.u32 %r55, %r9;
+; CHECKPTX62-NEXT:    @%p2 bra $L__BB0_3;
+; CHECKPTX62-NEXT:  // %bb.4: // %atomicrmw.end8
+; CHECKPTX62-NEXT:    and.b32 %r10, %r22, -4;
+; CHECKPTX62-NEXT:    shl.b32 %r38, %r22, 3;
+; CHECKPTX62-NEXT:    and.b32 %r11, %r38, 24;
+; CHECKPTX62-NEXT:    shl.b32 %r40, %r26, %r11;
+; CHECKPTX62-NEXT:    not.b32 %r12, %r40;
+; CHECKPTX62-NEXT:    ld.global.u32 %r56, [%r10];
+; CHECKPTX62-NEXT:  $L__BB0_5: // %atomicrmw.start27
+; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECKPTX62-NEXT:    shr.u32 %r41, %r56, %r11;
+; CHECKPTX62-NEXT:    cvt.u16.u32 %rs11, %r41;
+; CHECKPTX62-NEXT:    add.rn.f16 %rs13, %rs11, %rs1;
+; CHECKPTX62-NEXT:    cvt.u32.u16 %r42, %rs13;
+; CHECKPTX62-NEXT:    shl.b32 %r43, %r42, %r11;
+; CHECKPTX62-NEXT:    and.b32 %r44, %r56, %r12;
+; CHECKPTX62-NEXT:    or.b32 %r45, %r44, %r43;
+; CHECKPTX62-NEXT:    atom.global.cas.b32 %r15, [%r10], %r56, %r45;
+; CHECKPTX62-NEXT:    setp.ne.s32 %p3, %r15, %r56;
+; CHECKPTX62-NEXT:    mov.u32 %r56, %r15;
+; CHECKPTX62-NEXT:    @%p3 bra $L__BB0_5;
+; CHECKPTX62-NEXT:  // %bb.6: // %atomicrmw.end26
+; CHECKPTX62-NEXT:    and.b32 %r16, %r23, -4;
+; CHECKPTX62-NEXT:    shl.b32 %r46, %r23, 3;
+; CHECKPTX62-NEXT:    and.b32 %r17, %r46, 24;
+; CHECKPTX62-NEXT:    shl.b32 %r48, %r26, %r17;
+; CHECKPTX62-NEXT:    not.b32 %r18, %r48;
+; CHECKPTX62-NEXT:    ld.shared.u32 %r57, [%r16];
+; CHECKPTX62-NEXT:  $L__BB0_7: // %atomicrmw.start45
+; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECKPTX62-NEXT:    shr.u32 %r49, %r57, %r17;
+; CHECKPTX62-NEXT:    cvt.u16.u32 %rs15, %r49;
+; CHECKPTX62-NEXT:    add.rn.f16 %rs17, %rs15, %rs1;
+; CHECKPTX62-NEXT:    cvt.u32.u16 %r50, %rs17;
+; CHECKPTX62-NEXT:    shl.b32 %r51, %r50, %r17;
+; CHECKPTX62-NEXT:    and.b32 %r52, %r57, %r18;
+; CHECKPTX62-NEXT:    or.b32 %r53, %r52, %r51;
+; CHECKPTX62-NEXT:    atom.shared.cas.b32 %r21, [%r16], %r57, %r53;
+; CHECKPTX62-NEXT:    setp.ne.s32 %p4, %r21, %r57;
+; CHECKPTX62-NEXT:    mov.u32 %r57, %r21;
+; CHECKPTX62-NEXT:    @%p4 bra $L__BB0_7;
+; CHECKPTX62-NEXT:  // %bb.8: // %atomicrmw.end44
+; CHECKPTX62-NEXT:    ret;
+  %r1 = atomicrmw fadd ptr %dp0, half %val seq_cst
+  %r2 = atomicrmw fadd ptr %dp0, half 1.0 seq_cst
+  %r3 = atomicrmw fadd ptr addrspace(1) %dp1, half %val seq_cst
+  %r4 = atomicrmw fadd ptr addrspace(3) %dp3, half %val seq_cst
+  ret void
+}
+
+attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll
index e99d0fd..6f2b5dc 100644
--- a/llvm/test/CodeGen/NVPTX/atomics.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics.ll
@@ -175,6 +175,13 @@ define float @atomicrmw_add_f32_generic(ptr %addr, float %val) {
   ret float %ret
 }
 
+; CHECK-LABEL: atomicrmw_add_f16_generic
+define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
+; CHECK: atom.cas
+  %ret = atomicrmw fadd ptr %addr, half %val seq_cst
+  ret half %ret
+}
+
 ; CHECK-LABEL: atomicrmw_add_f32_addrspace1
 define float @atomicrmw_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
 ; CHECK: atom.global.add.f32
diff --git a/llvm/test/CodeGen/NVPTX/common-linkage.ll b/llvm/test/CodeGen/NVPTX/common-linkage.ll
new file mode 100644
index 0000000..ac16d9a
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/common-linkage.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+
+; CHECK: .common .global .align 4 .u32 g
+@g = common addrspace(1) global i32 0, align 4
+
+; CHECK: .weak .const .align 4 .u32 c
+@c = common addrspace(4) global i32 0, align 4
+
+; CHECK: .weak .shared .align 4 .u32 s
+@s = common addrspace(3) global i32 0, align 4
+
+define i32 @f1() {
+  %1 = load i32, ptr addrspace(1) @g
+  ret i32 %1
+}
+
+define i32 @f4() {
+  %1 = load i32, ptr addrspace(4) @c
+  ret i32 %1
+}
+
+define i32 @f3() {
+  %1 = load i32, ptr addrspace(3) @s
+  ret i32 %1
+}
diff --git a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
index 3ef55ca..2db0c67 100644
--- a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
+++ b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
@@ -1,10 +1,44 @@
-; RUN: not llc -march=nvptx < %s 2>&1 | FileCheck %s
-; RUN: not llc -march=nvptx64 < %s 2>&1 | FileCheck %s
+; RUN: not llc < %s -march=nvptx -mattr=+ptx72 -mcpu=sm_52 2>&1 | FileCheck %s --check-prefixes=CHECK-FAILS
+; RUN: not llc < %s -march=nvptx -mattr=+ptx73 -mcpu=sm_50 2>&1 | FileCheck %s --check-prefixes=CHECK-FAILS
 
-; CHECK: in function test_dynamic_stackalloc{{.*}}: dynamic alloca unsupported by NVPTX backend
+; RUN: llc < %s -march=nvptx -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK,CHECK-32
+; RUN: llc < %s -march=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK,CHECK-64
+; RUN: %if ptxas %{ llc < %s -march=nvptx -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
 
-define void @test_dynamic_stackalloc(i64 %n) {
-  %alloca = alloca i32, i64 %n
-  store volatile i32 0, ptr %alloca
-  ret void
+; CHECK-FAILS: in function test_dynamic_stackalloc{{.*}}: Support for dynamic alloca introduced in PTX ISA version 7.3 and requires target sm_52.
+
+; CHECK-LABEL: .visible .func  (.param .b32 func_retval0) test_dynamic_stackalloc(
+; CHECK-NOT: __local_depot
+
+; CHECK-32:       ld.param.u32  %r[[SIZE:[0-9]]], [test_dynamic_stackalloc_param_0];
+; CHECK-32-NEXT:  mad.lo.s32 %r[[SIZE2:[0-9]]], %r[[SIZE]], 1, 7;
+; CHECK-32-NEXT:  and.b32         %r[[SIZE3:[0-9]]], %r[[SIZE2]], -8;
+; CHECK-32-NEXT:  alloca.u32  %r[[ALLOCA:[0-9]]], %r[[SIZE3]], 16;
+; CHECK-32-NEXT:  cvta.local.u32  %r[[ALLOCA]], %r[[ALLOCA]];
+; CHECK-32-NEXT:  { // callseq 0, 0
+; CHECK-32-NEXT:  .reg .b32 temp_param_reg;
+; CHECK-32-NEXT:  .param .b32 param0;
+; CHECK-32-NEXT:  st.param.b32  [param0+0], %r[[ALLOCA]];
+
+; CHECK-64:       ld.param.u64  %rd[[SIZE:[0-9]]], [test_dynamic_stackalloc_param_0];
+; CHECK-64-NEXT:  add.s64 %rd[[SIZE2:[0-9]]], %rd[[SIZE]], 7;
+; CHECK-64-NEXT:  and.b64 %rd[[SIZE3:[0-9]]], %rd[[SIZE2]], -8;
+; CHECK-64-NEXT:  alloca.u64  %rd[[ALLOCA:[0-9]]], %rd[[SIZE3]], 16;
+; CHECK-64-NEXT:  cvta.local.u64  %rd[[ALLOCA]], %rd[[ALLOCA]];
+; CHECK-64-NEXT:  { // callseq 0, 0
+; CHECK-64-NEXT:  .reg .b32 temp_param_reg;
+; CHECK-64-NEXT:  .param .b64 param0;
+; CHECK-64-NEXT:  st.param.b64  [param0+0], %rd[[ALLOCA]];
+
+; CHECK-NEXT:     .param .b32 retval0;
+; CHECK-NEXT:     call.uni (retval0),
+; CHECK-NEXT:     bar,
+
+define i32 @test_dynamic_stackalloc(i64 %n) {
+  %alloca = alloca i8, i64 %n, align 16
+  %call = call i32 @bar(ptr %alloca)
+  ret i32 %call
 }
+
+declare i32 @bar(ptr)
+\ No newline at end of file
diff --git a/llvm/test/CodeGen/NVPTX/weak-global.ll b/llvm/test/CodeGen/NVPTX/weak-global.ll
index dd0160d..781ecb9 100644
--- a/llvm/test/CodeGen/NVPTX/weak-global.ll
+++ b/llvm/test/CodeGen/NVPTX/weak-global.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
-; CHECK: .weak .global .align 4 .u32 g
+; CHECK: .common .global .align 4 .u32 g
 @g = common addrspace(1) global i32 zeroinitializer
 
 define i32 @func0() {
diff --git a/llvm/test/CodeGen/PowerPC/aix-codemodel-attr.ll b/llvm/test/CodeGen/PowerPC/aix-codemodel-attr.ll
new file mode 100644
index 0000000..ef1156e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-codemodel-attr.ll
@@ -0,0 +1,166 @@
+; RUN: llc --verify-machineinstrs -mtriple powerpc-ibm-aix --code-model=small < \
+; RUN: %s | FileCheck --check-prefixes=CHECK,CHECK32,CHECK-SMALL,CHECK-SMALL32 %s
+
+; RUN: llc --verify-machineinstrs -mtriple powerpc-ibm-aix --code-model=large < \
+; RUN: %s | FileCheck --check-prefixes=CHECK,CHECK32,CHECK-LARGE,CHECK-LARGE32 %s
+
+; RUN: llc --verify-machineinstrs -mtriple powerpc64-ibm-aix --code-model=small < \
+; RUN: %s | FileCheck --check-prefixes=CHECK,CHECK64,CHECK-SMALL,CHECK-SMALL64 %s
+
+; RUN: llc --verify-machineinstrs -mtriple powerpc64-ibm-aix --code-model=large < \
+; RUN: %s | FileCheck --check-prefixes=CHECK,CHECK64,CHECK-LARGE,CHECK-LARGE64 %s
+
+@a = external dso_local global i32, code_model "small", align 4
+@b = external dso_local global i32, code_model "large", align 4
+@c = dso_local global i32 55, code_model "small", align 4
+@d = dso_local global i32 41, code_model "large", align 4
+@e = external dso_local global i32, align 4
+@f = dso_local global i32 2748, align 4
+
+@large_aliasee = global i32 10, code_model "large", align 4
+@small_aliasee = global i32 171, code_model "small", align 4
+@normal_aliasee = global i32 2748, align 4
+
+@al = alias i32, ptr @large_aliasee
+@as = alias i32, ptr @small_aliasee
+@an = alias i32, ptr @normal_aliasee
+
+define i32 @A() local_unnamed_addr {
+entry:
+  %0 = load i32, ptr @a, align 4
+  ret i32 %0
+}
+; CHECK32:  lwz [[SCRATCH:[0-9]+]], L..C[[TL_A:[0-9]+]](2)                         # @a
+; CHECK64:  ld [[SCRATCH:[0-9]+]], L..C[[TL_A:[0-9]+]](2)                         # @a
+; CHECK:    lwz 3, 0([[SCRATCH]])
+; CHECK:    blr
+
+define i32 @B() local_unnamed_addr {
+entry:
+  %0 = load i32, ptr @b, align 4
+  ret i32 %0
+}
+; CHECK:   addis [[HI:[0-9]+]], L..C[[TL_B:[0-9]+]]@u(2)
+; CHECK32: lwz [[ADDR:[0-9]+]], L..C[[TL_B]]@l([[HI]])
+; CHECK64: ld [[ADDR:[0-9]+]], L..C[[TL_B]]@l([[HI]])
+; CHECK:   lwz 3, 0([[ADDR]])
+; CHECK:   blr
+
+define i32 @C() local_unnamed_addr {
+entry:
+  %0 = load i32, ptr @c, align 4
+  ret i32 %0
+}
+; CHECK32:  lwz [[SCRATCH:[0-9]+]], L..C[[TL_C:[0-9]+]](2)                         # @c
+; CHECK64:  ld [[SCRATCH:[0-9]+]], L..C[[TL_C:[0-9]+]](2)                         # @c
+; CHECK:    lwz 3, 0([[SCRATCH]])
+; CHECK:    blr
+
+define i32 @D() local_unnamed_addr {
+entry:
+  %0 = load i32, ptr @d, align 4
+  ret i32 %0
+}
+; CHECK: addis [[HI:[0-9]+]], L..C[[TL_D:[0-9]+]]@u(2)
+; CHECK32: lwz [[ADDR:[0-9]+]], L..C[[TL_D]]@l([[HI]])
+; CHECK64: ld [[ADDR:[0-9]+]], L..C[[TL_D]]@l([[HI]])
+; CHECK: lwz 3, 0([[ADDR]])
+; CHECK: blr
+
+define i32 @E() {
+entry:
+  %0 = load i32, ptr @e, align 4
+  ret i32 %0
+}
+; CHECK-LARGE: addis [[HI:[0-9]+]], L..C[[TL_E:[0-9]+]]@u(2)
+; CHECK-LARGE32: lwz [[SCRATCH:[0-9]+]], L..C[[TL_E]]@l([[HI]])
+; CHECK-SMALL32: lwz [[SCRATCH:[0-9]+]], L..C[[TL_E:[0-9]+]](2)
+; CHECK-LARGE64: ld  [[SCRATCH:[0-9]+]], L..C[[TL_E]]@l([[HI]])
+; CHECK-SMALL64: ld  [[SCRATCH:[0-9]+]], L..C[[TL_E:[0-9]+]](2)
+; CHECK: lwz 3, 0([[SCRATCH]])
+; CHECK: blr
+
+define i32 @F() {
+entry:
+  %0 = load i32, ptr @f, align 4
+  ret i32 %0
+}
+; CHECK-LARGE: addis [[HI:[0-9]+]], L..C[[TL_F:[0-9]+]]@u(2)
+; CHECK-LARGE32: lwz [[SCRATCH:[0-9]+]], L..C[[TL_F]]@l([[HI]])
+; CHECK-SMALL32: lwz [[SCRATCH:[0-9]+]], L..C[[TL_F:[0-9]+]](2)
+; CHECK-LARGE64: ld [[SCRATCH:[0-9]+]], L..C[[TL_F]]@l([[HI]])
+; CHECK-SMALL64: ld  [[SCRATCH:[0-9]+]], L..C[[TL_F:[0-9]+]](2)
+; CHECK: lwz 3, 0([[SCRATCH]])
+; CHECK: blr
+
+define noundef nonnull ptr @addr_a() local_unnamed_addr {
+entry:
+  ret ptr @a
+}
+; CHECK32:  lwz 3, L..C[[TL_A]](2)                         # @a
+; CHECK64:  ld 3, L..C[[TL_A]](2)                         # @a
+; CHECK:    blr
+
+define noundef nonnull ptr @addr_b() local_unnamed_addr {
+entry:
+  ret ptr @b
+}
+; CHECK:    addis [[HI:[0-9]+]], L..C[[TL_B]]@u(2)
+; CHECK32:  lwz 3, L..C[[TL_B]]@l([[HI]])
+; CHECK64:  ld 3, L..C[[TL_B]]@l([[HI]])
+; CHECK:    blr
+
+
+define noundef nonnull ptr @addr_c() local_unnamed_addr {
+entry:
+  ret ptr @c
+}
+; CHECK32:  lwz 3, L..C[[TL_C]](2)                         # @c
+; CHECK64:  ld 3, L..C[[TL_C]](2)                         # @c
+; CHECK:    blr
+
+define noundef nonnull ptr @addr_d() local_unnamed_addr {
+entry:
+  ret ptr @d
+}
+; CHECK:   addis [[HI:[0-9]+]], L..C[[TL_D]]@u(2)
+; CHECK32: lwz 3, L..C[[TL_D]]@l([[HI]])
+; CHECK64: ld 3, L..C[[TL_D]]@l([[HI]])
+; CHECK:   blr
+
+define i32 @G() {
+   %tmp = load i32, ptr @al
+   ret i32 %tmp
+}
+; CHECK:   addis [[HI:[0-9]+]], L..C[[TL_AL:[0-9]+]]@u(2)
+; CHECK32: lwz [[ADDR:[0-9]+]], L..C[[TL_AL]]@l([[HI]])
+; CHECK64: ld  [[ADDR:[0-9]+]], L..C[[TL_AL]]@l([[HI]])
+; CHECK:   lwz 3, 0([[ADDR]])
+
+define i32 @H() {
+   %tmp = load i32, ptr @as
+   ret i32 %tmp
+}
+; CHECK32: lwz [[ADDR:[0-9]+]], L..C[[TL_AS:[0-9]+]](2)
+; CHECK64: ld [[ADDR:[0-9]+]], L..C[[TL_AS:[0-9]+]](2)
+; CHECK:   lwz 3, 0([[ADDR]])
+
+;; Check TOC entires have correct storage mapping class
+; CHECK:         L..C[[TL_A]]:
+; CHECK:           .tc a[TC],a[UA]
+; CHECK:         L..C[[TL_B]]:
+; CHECK:           .tc b[TE],b[UA]
+; CHECK:         L..C[[TL_C]]:
+; CHECK:           .tc c[TC],c[RW]
+; CHECK:         L..C[[TL_D]]:
+; CHECK:           .tc d[TE],d[RW]
+; CHECK:         L..C[[TL_E]]:
+; CHECK-SMALL:     .tc e[TC],e[UA]
+; CHECK-LARGE:     .tc e[TE],e[UA]
+; CHECK:         L..C[[TL_F]]:
+; CHECK-SMALL:     .tc f[TC],f[RW]
+; CHECK-LARGE:     .tc f[TE],f[RW]
+; CHECK:         L..C[[TL_AL]]:
+; CHECK:           .tc al[TE],al
+; CHECK:         L..C[[TL_AS]]:
+; CHECK:           .tc as[TC],as
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
index c1d1461..50ebe04 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple powerpc64le < %s | FileCheck %s
 
 ; Check constrained ops converted to call
-define void @test(ptr %cast) {
+define void @test(ptr %cast) strictfp {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0: # %root
 ; CHECK-NEXT:    mflr 0
@@ -51,7 +51,7 @@ for.body:
 }
 
 ; Check constrained ops converted to native instruction
-define void @test2(ptr %cast) {
+define void @test2(ptr %cast) strictfp {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    li 4, 255
diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll
index cfdefec..ebf232c 100644
--- a/llvm/test/CodeGen/RISCV/machine-combiner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll
@@ -740,9 +740,9 @@ define i8 @test_reassoc_minu_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
 ; CHECK-LABEL: test_reassoc_minu_i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a3, a3, 255
-; CHECK-NEXT:    andi a2, a2, 255
 ; CHECK-NEXT:    andi a1, a1, 255
 ; CHECK-NEXT:    andi a0, a0, 255
+; CHECK-NEXT:    andi a2, a2, 255
 ; CHECK-NEXT:    minu a0, a0, a1
 ; CHECK-NEXT:    minu a1, a2, a3
 ; CHECK-NEXT:    minu a0, a0, a1
@@ -757,9 +757,9 @@ define i16 @test_reassoc_minu_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
 ; CHECK-LABEL: test_reassoc_minu_i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    zext.h a3, a3
-; CHECK-NEXT:    zext.h a2, a2
 ; CHECK-NEXT:    zext.h a1, a1
 ; CHECK-NEXT:    zext.h a0, a0
+; CHECK-NEXT:    zext.h a2, a2
 ; CHECK-NEXT:    minu a0, a0, a1
 ; CHECK-NEXT:    minu a1, a2, a3
 ; CHECK-NEXT:    minu a0, a0, a1
@@ -774,9 +774,9 @@ define i32 @test_reassoc_minu_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
 ; CHECK-LABEL: test_reassoc_minu_i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sext.w a3, a3
-; CHECK-NEXT:    sext.w a2, a2
 ; CHECK-NEXT:    sext.w a1, a1
 ; CHECK-NEXT:    sext.w a0, a0
+; CHECK-NEXT:    sext.w a2, a2
 ; CHECK-NEXT:    minu a0, a0, a1
 ; CHECK-NEXT:    minu a1, a2, a3
 ; CHECK-NEXT:    minu a0, a0, a1
@@ -804,9 +804,9 @@ define i8 @test_reassoc_min_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
 ; CHECK-LABEL: test_reassoc_min_i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sext.b a3, a3
-; CHECK-NEXT:    sext.b a2, a2
 ; CHECK-NEXT:    sext.b a1, a1
 ; CHECK-NEXT:    sext.b a0, a0
+; CHECK-NEXT:    sext.b a2, a2
 ; CHECK-NEXT:    min a0, a0, a1
 ; CHECK-NEXT:    min a1, a2, a3
 ; CHECK-NEXT:    min a0, a0, a1
@@ -821,9 +821,9 @@ define i16 @test_reassoc_min_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
 ; CHECK-LABEL: test_reassoc_min_i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sext.h a3, a3
-; CHECK-NEXT:    sext.h a2, a2
 ; CHECK-NEXT:    sext.h a1, a1
 ; CHECK-NEXT:    sext.h a0, a0
+; CHECK-NEXT:    sext.h a2, a2
 ; CHECK-NEXT:    min a0, a0, a1
 ; CHECK-NEXT:    min a1, a2, a3
 ; CHECK-NEXT:    min a0, a0, a1
@@ -838,9 +838,9 @@ define i32 @test_reassoc_min_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
 ; CHECK-LABEL: test_reassoc_min_i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sext.w a3, a3
-; CHECK-NEXT:    sext.w a2, a2
 ; CHECK-NEXT:    sext.w a1, a1
 ; CHECK-NEXT:    sext.w a0, a0
+; CHECK-NEXT:    sext.w a2, a2
 ; CHECK-NEXT:    min a0, a0, a1
 ; CHECK-NEXT:    min a1, a2, a3
 ; CHECK-NEXT:    min a0, a0, a1
@@ -868,9 +868,9 @@ define i8 @test_reassoc_maxu_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
 ; CHECK-LABEL: test_reassoc_maxu_i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a3, a3, 255
-; CHECK-NEXT:    andi a2, a2, 255
 ; CHECK-NEXT:    andi a1, a1, 255
 ; CHECK-NEXT:    andi a0, a0, 255
+; CHECK-NEXT:    andi a2, a2, 255
 ; CHECK-NEXT:    maxu a0, a0, a1
 ; CHECK-NEXT:    maxu a1, a2, a3
 ; CHECK-NEXT:    maxu a0, a0, a1
@@ -885,9 +885,9 @@ define i16 @test_reassoc_maxu_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
 ; CHECK-LABEL: test_reassoc_maxu_i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    zext.h a3, a3
-; CHECK-NEXT:    zext.h a2, a2
 ; CHECK-NEXT:    zext.h a1, a1
 ; CHECK-NEXT:    zext.h a0, a0
+; CHECK-NEXT:    zext.h a2, a2
 ; CHECK-NEXT:    maxu a0, a0, a1
 ; CHECK-NEXT:    maxu a1, a2, a3
 ; CHECK-NEXT:    maxu a0, a0, a1
@@ -902,9 +902,9 @@ define i32 @test_reassoc_maxu_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
 ; CHECK-LABEL: test_reassoc_maxu_i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sext.w a3, a3
-; CHECK-NEXT:    sext.w a2, a2
 ; CHECK-NEXT:    sext.w a1, a1
 ; CHECK-NEXT:    sext.w a0, a0
+; CHECK-NEXT:    sext.w a2, a2
 ; CHECK-NEXT:    maxu a0, a0, a1
 ; CHECK-NEXT:    maxu a1, a2, a3
 ; CHECK-NEXT:    maxu a0, a0, a1
@@ -932,9 +932,9 @@ define i8 @test_reassoc_max_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
 ; CHECK-LABEL: test_reassoc_max_i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sext.b a3, a3
-; CHECK-NEXT:    sext.b a2, a2
 ; CHECK-NEXT:    sext.b a1, a1
 ; CHECK-NEXT:    sext.b a0, a0
+; CHECK-NEXT:    sext.b a2, a2
 ; CHECK-NEXT:    max a0, a0, a1
 ; CHECK-NEXT:    max a1, a2, a3
 ; CHECK-NEXT:    max a0, a0, a1
@@ -949,9 +949,9 @@ define i16 @test_reassoc_max_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
 ; CHECK-LABEL: test_reassoc_max_i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sext.h a3, a3
-; CHECK-NEXT:    sext.h a2, a2
 ; CHECK-NEXT:    sext.h a1, a1
 ; CHECK-NEXT:    sext.h a0, a0
+; CHECK-NEXT:    sext.h a2, a2
 ; CHECK-NEXT:    max a0, a0, a1
 ; CHECK-NEXT:    max a1, a2, a3
 ; CHECK-NEXT:    max a0, a0, a1
@@ -966,9 +966,9 @@ define i32 @test_reassoc_max_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
 ; CHECK-LABEL: test_reassoc_max_i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sext.w a3, a3
-; CHECK-NEXT:    sext.w a2, a2
 ; CHECK-NEXT:    sext.w a1, a1
 ; CHECK-NEXT:    sext.w a0, a0
+; CHECK-NEXT:    sext.w a2, a2
 ; CHECK-NEXT:    max a0, a0, a1
 ; CHECK-NEXT:    max a1, a2, a3
 ; CHECK-NEXT:    max a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
index 68740ee..7dcfb24 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
@@ -1599,15 +1599,16 @@ define float @vreduce_fminimum_v2f32(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v9, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v9
+; CHECK-NEXT:    beqz a0, .LBB99_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB99_2:
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <2 x float>, ptr %x
@@ -1619,15 +1620,8 @@ define float @vreduce_fminimum_v2f32_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v2f32_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <2 x float>, ptr %x
@@ -1641,24 +1635,16 @@ define float @vreduce_fminimum_v4f32(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v9, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v9
+; CHECK-NEXT:    beqz a0, .LBB101_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB101_2:
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <4 x float>, ptr %x
@@ -1670,24 +1656,8 @@ define float @vreduce_fminimum_v4f32_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v4f32_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <4 x float>, ptr %x
@@ -1701,33 +1671,16 @@ define float @vreduce_fminimum_v8f32(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v10, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v10
+; CHECK-NEXT:    beqz a0, .LBB103_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB103_2:
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <8 x float>, ptr %x
@@ -1739,33 +1692,8 @@ define float @vreduce_fminimum_v8f32_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v8f32_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <8 x float>, ptr %x
@@ -1779,42 +1707,16 @@ define float @vreduce_fminimum_v16f32(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v12, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v12
+; CHECK-NEXT:    beqz a0, .LBB105_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB105_2:
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <16 x float>, ptr %x
@@ -1826,42 +1728,8 @@ define float @vreduce_fminimum_v16f32_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v16f32_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <16 x float>, ptr %x
@@ -1876,51 +1744,16 @@ define float @vreduce_fminimum_v32f32(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v16, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    beqz a0, .LBB107_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB107_2:
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <32 x float>, ptr %x
@@ -1933,51 +1766,8 @@ define float @vreduce_fminimum_v32f32_nonans(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <32 x float>, ptr %x
@@ -2009,52 +1799,18 @@ define float @vreduce_fminimum_v64f32(ptr %x) {
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmin.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
+; CHECK-NEXT:    vmfne.vv v16, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    beqz a0, .LBB109_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    j .LBB109_3
+; CHECK-NEXT:  .LBB109_2:
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:  .LBB109_3:
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
@@ -2073,51 +1829,8 @@ define float @vreduce_fminimum_v64f32_nonans(ptr %x) {
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    addi a0, a0, 128
 ; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vfmin.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <64 x float>, ptr %x
@@ -2208,52 +1921,18 @@ define float @vreduce_fminimum_v128f32(ptr %x) {
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmin.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
+; CHECK-NEXT:    vmfne.vv v16, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    beqz a0, .LBB111_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    j .LBB111_3
+; CHECK-NEXT:  .LBB111_2:
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:  .LBB111_3:
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    mv a1, a0
@@ -2281,51 +1960,8 @@ define float @vreduce_fminimum_v128f32_nonans(ptr %x) {
 ; CHECK-NEXT:    vle32.v v0, (a1)
 ; CHECK-NEXT:    vfmin.vv v16, v24, v16
 ; CHECK-NEXT:    vfmin.vv v8, v8, v0
-; CHECK-NEXT:    vfmin.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <128 x float>, ptr %x
@@ -2339,15 +1975,16 @@ define double @vreduce_fminimum_v2f64(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v9, (a0)
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v9, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v9
+; CHECK-NEXT:    beqz a0, .LBB113_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, %hi(.LCPI113_0)
+; CHECK-NEXT:    fld fa0, %lo(.LCPI113_0)(a0)
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB113_2:
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <2 x double>, ptr %x
@@ -2359,15 +1996,8 @@ define double @vreduce_fminimum_v2f64_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v2f64_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v9, (a0)
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <2 x double>, ptr %x
@@ -2381,24 +2011,16 @@ define double @vreduce_fminimum_v4f64(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle64.v v10, (a0)
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v10, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v10
+; CHECK-NEXT:    beqz a0, .LBB115_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, %hi(.LCPI115_0)
+; CHECK-NEXT:    fld fa0, %lo(.LCPI115_0)(a0)
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB115_2:
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <4 x double>, ptr %x
@@ -2410,24 +2032,8 @@ define double @vreduce_fminimum_v4f64_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v4f64_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle64.v v10, (a0)
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <4 x double>, ptr %x
@@ -2441,33 +2047,16 @@ define double @vreduce_fminimum_v8f64(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vle64.v v12, (a0)
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v12, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v12
+; CHECK-NEXT:    beqz a0, .LBB117_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, %hi(.LCPI117_0)
+; CHECK-NEXT:    fld fa0, %lo(.LCPI117_0)(a0)
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB117_2:
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <8 x double>, ptr %x
@@ -2479,33 +2068,8 @@ define double @vreduce_fminimum_v8f64_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v8f64_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vle64.v v12, (a0)
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <8 x double>, ptr %x
@@ -2519,42 +2083,16 @@ define double @vreduce_fminimum_v16f64(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v16f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle64.v v16, (a0)
-; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 8
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v16, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    beqz a0, .LBB119_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, %hi(.LCPI119_0)
+; CHECK-NEXT:    fld fa0, %lo(.LCPI119_0)(a0)
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB119_2:
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <16 x double>, ptr %x
@@ -2566,42 +2104,8 @@ define double @vreduce_fminimum_v16f64_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fminimum_v16f64_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle64.v v16, (a0)
-; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 8
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <16 x double>, ptr %x
@@ -2632,43 +2136,18 @@ define double @vreduce_fminimum_v32f64(ptr %x) {
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmin.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 8
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
+; CHECK-NEXT:    vmfne.vv v16, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    beqz a0, .LBB121_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, %hi(.LCPI121_0)
+; CHECK-NEXT:    fld fa0, %lo(.LCPI121_0)(a0)
+; CHECK-NEXT:    j .LBB121_3
+; CHECK-NEXT:  .LBB121_2:
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:  .LBB121_3:
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
@@ -2686,42 +2165,8 @@ define double @vreduce_fminimum_v32f64_nonans(ptr %x) {
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    addi a0, a0, 128
 ; CHECK-NEXT:    vle64.v v16, (a0)
-; CHECK-NEXT:    vfmin.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 8
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <32 x double>, ptr %x
@@ -2811,43 +2256,18 @@ define double @vreduce_fminimum_v64f64(ptr %x) {
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmin.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 8
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
+; CHECK-NEXT:    vmfne.vv v16, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    beqz a0, .LBB123_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, %hi(.LCPI123_0)
+; CHECK-NEXT:    fld fa0, %lo(.LCPI123_0)(a0)
+; CHECK-NEXT:    j .LBB123_3
+; CHECK-NEXT:  .LBB123_2:
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:  .LBB123_3:
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    mv a1, a0
@@ -2874,42 +2294,8 @@ define double @vreduce_fminimum_v64f64_nonans(ptr %x) {
 ; CHECK-NEXT:    vle64.v v0, (a1)
 ; CHECK-NEXT:    vfmin.vv v16, v24, v16
 ; CHECK-NEXT:    vfmin.vv v8, v8, v0
-; CHECK-NEXT:    vfmin.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 8
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmin.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmin.vv v8, v11, v8
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
+; CHECK-NEXT:    vfredmin.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <64 x double>, ptr %x
@@ -2923,15 +2309,16 @@ define float @vreduce_fmaximum_v2f32(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v9, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v9
+; CHECK-NEXT:    beqz a0, .LBB125_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB125_2:
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <2 x float>, ptr %x
@@ -2943,15 +2330,8 @@ define float @vreduce_fmaximum_v2f32_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v2f32_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <2 x float>, ptr %x
@@ -2965,24 +2345,16 @@ define float @vreduce_fmaximum_v4f32(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v9, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v9
+; CHECK-NEXT:    beqz a0, .LBB127_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB127_2:
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <4 x float>, ptr %x
@@ -2994,24 +2366,8 @@ define float @vreduce_fmaximum_v4f32_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v4f32_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <4 x float>, ptr %x
@@ -3025,33 +2381,16 @@ define float @vreduce_fmaximum_v8f32(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v10, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v10
+; CHECK-NEXT:    beqz a0, .LBB129_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB129_2:
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <8 x float>, ptr %x
@@ -3063,33 +2402,8 @@ define float @vreduce_fmaximum_v8f32_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v8f32_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <8 x float>, ptr %x
@@ -3103,42 +2417,16 @@ define float @vreduce_fmaximum_v16f32(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v12, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v12
+; CHECK-NEXT:    beqz a0, .LBB131_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB131_2:
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <16 x float>, ptr %x
@@ -3150,42 +2438,8 @@ define float @vreduce_fmaximum_v16f32_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v16f32_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <16 x float>, ptr %x
@@ -3200,51 +2454,16 @@ define float @vreduce_fmaximum_v32f32(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v16, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    beqz a0, .LBB133_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB133_2:
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <32 x float>, ptr %x
@@ -3257,51 +2476,8 @@ define float @vreduce_fmaximum_v32f32_nonans(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <32 x float>, ptr %x
@@ -3333,52 +2509,18 @@ define float @vreduce_fmaximum_v64f32(ptr %x) {
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmax.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
+; CHECK-NEXT:    vmfne.vv v16, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    beqz a0, .LBB135_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    j .LBB135_3
+; CHECK-NEXT:  .LBB135_2:
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:  .LBB135_3:
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
@@ -3397,51 +2539,8 @@ define float @vreduce_fmaximum_v64f32_nonans(ptr %x) {
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    addi a0, a0, 128
 ; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vfmax.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <64 x float>, ptr %x
@@ -3532,52 +2631,18 @@ define float @vreduce_fmaximum_v128f32(ptr %x) {
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmax.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
+; CHECK-NEXT:    vmfne.vv v16, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    beqz a0, .LBB137_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, 523264
+; CHECK-NEXT:    fmv.w.x fa0, a0
+; CHECK-NEXT:    j .LBB137_3
+; CHECK-NEXT:  .LBB137_2:
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:  .LBB137_3:
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    mv a1, a0
@@ -3605,51 +2670,8 @@ define float @vreduce_fmaximum_v128f32_nonans(ptr %x) {
 ; CHECK-NEXT:    vle32.v v0, (a1)
 ; CHECK-NEXT:    vfmax.vv v16, v24, v16
 ; CHECK-NEXT:    vfmax.vv v8, v8, v0
-; CHECK-NEXT:    vfmax.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 16
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 8
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 4
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v9, 2
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v11, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <128 x float>, ptr %x
@@ -3663,15 +2685,16 @@ define double @vreduce_fmaximum_v2f64(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v9, (a0)
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v9, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v9
+; CHECK-NEXT:    beqz a0, .LBB139_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, %hi(.LCPI139_0)
+; CHECK-NEXT:    fld fa0, %lo(.LCPI139_0)(a0)
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB139_2:
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <2 x double>, ptr %x
@@ -3683,15 +2706,8 @@ define double @vreduce_fmaximum_v2f64_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v2f64_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v9, (a0)
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <2 x double>, ptr %x
@@ -3705,24 +2721,16 @@ define double @vreduce_fmaximum_v4f64(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle64.v v10, (a0)
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v10, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v10
+; CHECK-NEXT:    beqz a0, .LBB141_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, %hi(.LCPI141_0)
+; CHECK-NEXT:    fld fa0, %lo(.LCPI141_0)(a0)
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB141_2:
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <4 x double>, ptr %x
@@ -3734,24 +2742,8 @@ define double @vreduce_fmaximum_v4f64_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v4f64_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle64.v v10, (a0)
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <4 x double>, ptr %x
@@ -3765,33 +2757,16 @@ define double @vreduce_fmaximum_v8f64(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vle64.v v12, (a0)
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v12, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v12
+; CHECK-NEXT:    beqz a0, .LBB143_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, %hi(.LCPI143_0)
+; CHECK-NEXT:    fld fa0, %lo(.LCPI143_0)(a0)
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB143_2:
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <8 x double>, ptr %x
@@ -3803,33 +2778,8 @@ define double @vreduce_fmaximum_v8f64_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v8f64_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vle64.v v12, (a0)
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <8 x double>, ptr %x
@@ -3843,42 +2793,16 @@ define double @vreduce_fmaximum_v16f64(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v16f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle64.v v16, (a0)
-; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 8
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vmfne.vv v16, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    beqz a0, .LBB145_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, %hi(.LCPI145_0)
+; CHECK-NEXT:    fld fa0, %lo(.LCPI145_0)(a0)
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB145_2:
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <16 x double>, ptr %x
@@ -3890,42 +2814,8 @@ define double @vreduce_fmaximum_v16f64_nonans(ptr %x) {
 ; CHECK-LABEL: vreduce_fmaximum_v16f64_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle64.v v16, (a0)
-; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 8
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <16 x double>, ptr %x
@@ -3956,43 +2846,18 @@ define double @vreduce_fmaximum_v32f64(ptr %x) {
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmax.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 8
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
+; CHECK-NEXT:    vmfne.vv v16, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    beqz a0, .LBB147_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, %hi(.LCPI147_0)
+; CHECK-NEXT:    fld fa0, %lo(.LCPI147_0)(a0)
+; CHECK-NEXT:    j .LBB147_3
+; CHECK-NEXT:  .LBB147_2:
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:  .LBB147_3:
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
@@ -4010,42 +2875,8 @@ define double @vreduce_fmaximum_v32f64_nonans(ptr %x) {
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    addi a0, a0, 128
 ; CHECK-NEXT:    vle64.v v16, (a0)
-; CHECK-NEXT:    vfmax.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 8
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <32 x double>, ptr %x
@@ -4135,43 +2966,18 @@ define double @vreduce_fmaximum_v64f64(ptr %x) {
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmax.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 8
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
+; CHECK-NEXT:    vmfne.vv v16, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    beqz a0, .LBB149_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    lui a0, %hi(.LCPI149_0)
+; CHECK-NEXT:    fld fa0, %lo(.LCPI149_0)(a0)
+; CHECK-NEXT:    j .LBB149_3
+; CHECK-NEXT:  .LBB149_2:
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:  .LBB149_3:
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    mv a1, a0
@@ -4198,42 +3004,8 @@ define double @vreduce_fmaximum_v64f64_nonans(ptr %x) {
 ; CHECK-NEXT:    vle64.v v0, (a1)
 ; CHECK-NEXT:    vfmax.vv v16, v24, v16
 ; CHECK-NEXT:    vfmax.vv v8, v8, v0
-; CHECK-NEXT:    vfmax.vv v16, v8, v16
-; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vi v24, v16, 8
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
-; CHECK-NEXT:    vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v16, v12, 4
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v10, 2
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10
-; CHECK-NEXT:    vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vfmax.vv v9, v9, v8
-; CHECK-NEXT:    vslidedown.vi v10, v9, 1
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vmfeq.vv v8, v9, v9
-; CHECK-NEXT:    vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT:    vfmax.vv v8, v11, v8
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
+; CHECK-NEXT:    vfredmax.vs v8, v8, v8
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %v = load <64 x double>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
index 8c0d97a..f1ae320 100644
--- a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
@@ -89,8 +89,8 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 {
 ; RISCV32-NEXT:    snez a3, a3
 ; RISCV32-NEXT:    and a3, a3, a7
 ; RISCV32-NEXT:    or a2, a3, a2
-; RISCV32-NEXT:    or a3, t2, t3
-; RISCV32-NEXT:    or a2, a2, a3
+; RISCV32-NEXT:    or a2, a2, t2
+; RISCV32-NEXT:    or a2, a2, t3
 ; RISCV32-NEXT:    mul a3, a5, a4
 ; RISCV32-NEXT:    andi a2, a2, 1
 ; RISCV32-NEXT:    sw a3, 0(a0)
diff --git a/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-load.ll b/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-load.ll
index a30d079..18752fd 100644
--- a/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-load.ll
+++ b/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-load.ll
@@ -9,7 +9,7 @@
 ; CHECK-DAG: %[[#TYLONGPTR:]] = OpTypePointer Function %[[#TYLONG]]
 ; CHECK: %[[#PTRTOSTRUCT:]] = OpFunctionParameter %[[#TYSTRUCTPTR]]
 ; CHECK: %[[#PTRTOLONG:]] = OpBitcast %[[#TYLONGPTR]] %[[#PTRTOSTRUCT]]
-; CHECK: OpLoad %[[#TYLONG]] %[[#PTRTOLONG]]
+; CHECK-NEXT: OpLoad %[[#TYLONG]] %[[#PTRTOLONG]]
 
 %struct.S = type { i32 }
 %struct.__wrapper_class = type { [7 x %struct.S] }
diff --git a/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-store.ll b/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-store.ll
index 4701f02..202bcfb 100644
--- a/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-store.ll
+++ b/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-store.ll
@@ -13,7 +13,7 @@
 ; CHECK: %[[#OBJ:]] = OpFunctionParameter %[[#TYSTRUCT]]
 ; CHECK: %[[#ARGPTR2:]] = OpFunctionParameter %[[#TYLONGPTR]]
 ; CHECK: %[[#PTRTOSTRUCT:]] = OpBitcast %[[#TYSTRUCTPTR]] %[[#ARGPTR2]]
-; CHECK: OpStore %[[#PTRTOSTRUCT]] %[[#OBJ]]
+; CHECK-NEXT: OpStore %[[#PTRTOSTRUCT]] %[[#OBJ]]
 
 %struct.S = type { i32 }
 %struct.__wrapper_class = type { [7 x %struct.S] }
diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-args-rev.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-args-rev.ll
new file mode 100644
index 0000000..ae7fb99
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-args-rev.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-SPIRV-DAG: OpName %[[FooArg:.*]] "known_type_ptr"
+; CHECK-SPIRV-DAG: OpName %[[Foo:.*]] "foo"
+; CHECK-SPIRV-DAG: OpName %[[ArgToDeduce:.*]] "unknown_type_ptr"
+; CHECK-SPIRV-DAG: OpName %[[Bar:.*]] "bar"
+; CHECK-SPIRV-DAG: %[[Long:.*]] = OpTypeInt 32 0
+; CHECK-SPIRV-DAG: %[[Void:.*]] = OpTypeVoid
+; CHECK-SPIRV-DAG: %[[LongPtr:.*]] = OpTypePointer CrossWorkgroup %[[Long]]
+; CHECK-SPIRV-DAG: %[[Fun:.*]] = OpTypeFunction %[[Void]] %[[LongPtr]]
+; CHECK-SPIRV: %[[Bar]] = OpFunction %[[Void]] None %[[Fun]]
+; CHECK-SPIRV: %[[ArgToDeduce]] = OpFunctionParameter %[[LongPtr]]
+; CHECK-SPIRV: OpFunctionCall %[[Void]] %[[Foo]] %[[ArgToDeduce]]
+; CHECK-SPIRV: %[[Foo]] = OpFunction %[[Void]] None %[[Fun]]
+; CHECK-SPIRV: %[[FooArg]] = OpFunctionParameter %[[LongPtr]]
+
+define spir_kernel void @bar(ptr addrspace(1) %unknown_type_ptr) {
+entry:
+  %elem = getelementptr inbounds i32, ptr addrspace(1) %unknown_type_ptr, i64 0
+  call void @foo(ptr addrspace(1) %unknown_type_ptr)
+  ret void
+}
+
+define void @foo(ptr addrspace(1) %known_type_ptr) {
+entry:
+  ret void
+}
diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-args.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-args.ll
new file mode 100644
index 0000000..ee411f2
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-args.ll
@@ -0,0 +1,97 @@
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-SPIRV-DAG: OpName %[[FooArg:.*]] "unknown_type_ptr"
+; CHECK-SPIRV-DAG: OpName %[[Foo:.*]] "foo"
+; CHECK-SPIRV-DAG: OpName %[[BarArg:.*]] "known_type_ptr"
+; CHECK-SPIRV-DAG: OpName %[[Bar:.*]] "bar"
+; CHECK-SPIRV-DAG: OpName %[[UntypedArg:.*]] "arg"
+; CHECK-SPIRV-DAG: OpName %[[FunUntypedArg:.*]] "foo_untyped_arg"
+; CHECK-SPIRV-DAG: OpName %[[UnusedArg1:.*]] "unused_arg1"
+; CHECK-SPIRV-DAG: OpName %[[Foo2Arg:.*]] "unknown_type_ptr"
+; CHECK-SPIRV-DAG: OpName %[[Foo2:.*]] "foo2"
+; CHECK-SPIRV-DAG: OpName %[[Bar2Arg:.*]] "known_type_ptr"
+; CHECK-SPIRV-DAG: OpName %[[Bar2:.*]] "bar2"
+; CHECK-SPIRV-DAG: OpName %[[Foo5Arg1:.*]] "unknown_type_ptr1"
+; CHECK-SPIRV-DAG: OpName %[[Foo5Arg2:.*]] "unknown_type_ptr2"
+; CHECK-SPIRV-DAG: OpName %[[Foo5:.*]] "foo5"
+; CHECK-SPIRV-DAG: OpName %[[Bar5Arg:.*]] "known_type_ptr"
+; CHECK-SPIRV-DAG: OpName %[[Bar5:.*]] "bar5"
+; CHECK-SPIRV-DAG: %[[Char:.*]] = OpTypeInt 8 0
+; CHECK-SPIRV-DAG: %[[Long:.*]] = OpTypeInt 32 0
+; CHECK-SPIRV-DAG: %[[Half:.*]] = OpTypeFloat 16
+; CHECK-SPIRV-DAG: %[[Void:.*]] = OpTypeVoid
+; CHECK-SPIRV-DAG: %[[HalfConst:.*]] = OpConstant %[[Half]] 15360
+; CHECK-SPIRV-DAG: %[[CharPtr:.*]] = OpTypePointer CrossWorkgroup %[[Char]]
+; CHECK-SPIRV-DAG: %[[LongPtr:.*]] = OpTypePointer CrossWorkgroup %[[Long]]
+; CHECK-SPIRV-DAG: %[[Fun:.*]] = OpTypeFunction %[[Void]] %[[LongPtr]]
+; CHECK-SPIRV-DAG: %[[Fun2:.*]] = OpTypeFunction %[[Void]] %[[Half]] %[[LongPtr]]
+; CHECK-SPIRV-DAG: %[[Fun5:.*]] = OpTypeFunction %[[Void]] %[[Half]] %[[LongPtr]] %[[Half]] %[[LongPtr]] %[[Half]]
+; CHECK-SPIRV-DAG: %[[FunUntyped:.*]] = OpTypeFunction %[[Void]] %[[CharPtr]]
+
+; CHECK-SPIRV: %[[Foo]] = OpFunction %[[Void]] None %[[Fun]]
+; CHECK-SPIRV: %[[FooArg]] = OpFunctionParameter %[[LongPtr]]
+; CHECK-SPIRV: %[[Bar]] = OpFunction %[[Void]] None %[[Fun]]
+; CHECK-SPIRV: %[[BarArg]] = OpFunctionParameter %[[LongPtr]]
+; CHECK-SPIRV: OpFunctionCall %[[Void]] %[[Foo]] %[[BarArg]]
+
+; CHECK-SPIRV: %[[FunUntypedArg]] = OpFunction %[[Void]] None %[[FunUntyped]]
+; CHECK-SPIRV: %[[UntypedArg]] = OpFunctionParameter %[[CharPtr]]
+
+; CHECK-SPIRV: %[[Foo2]] = OpFunction %[[Void]] None %[[Fun2]]
+; CHECK-SPIRV: %[[UnusedArg1]] = OpFunctionParameter %[[Half]]
+; CHECK-SPIRV: %[[Foo2Arg]] = OpFunctionParameter %[[LongPtr]]
+; CHECK-SPIRV: %[[Bar2]] = OpFunction %[[Void]] None %[[Fun]]
+; CHECK-SPIRV: %[[Bar2Arg]] = OpFunctionParameter %[[LongPtr]]
+; CHECK-SPIRV: OpFunctionCall %[[Void]] %[[Foo2]] %[[HalfConst]] %[[Bar2Arg]]
+
+; CHECK-SPIRV: %[[Foo5]] = OpFunction %[[Void]] None %[[Fun5]]
+; CHECK-SPIRV: OpFunctionParameter %[[Half]]
+; CHECK-SPIRV: %[[Foo5Arg1]] = OpFunctionParameter %[[LongPtr]]
+; CHECK-SPIRV: OpFunctionParameter %[[Half]]
+; CHECK-SPIRV: %[[Foo5Arg2]] = OpFunctionParameter %[[LongPtr]]
+; CHECK-SPIRV: OpFunctionParameter %[[Half]]
+; CHECK-SPIRV: %[[Bar5]] = OpFunction %[[Void]] None %[[Fun]]
+; CHECK-SPIRV: %[[Bar5Arg]] = OpFunctionParameter %[[LongPtr]]
+; CHECK-SPIRV: OpFunctionCall %[[Void]] %[[Foo5]] %[[HalfConst]] %[[Bar5Arg]] %[[HalfConst]] %[[Bar5Arg]] %[[HalfConst]]
+
+define void @foo(ptr addrspace(1) %unknown_type_ptr) {
+entry:
+  ret void
+}
+
+define spir_kernel void @bar(ptr addrspace(1) %known_type_ptr) {
+entry:
+  %elem = getelementptr inbounds i32, ptr addrspace(1) %known_type_ptr, i64 0
+  call void @foo(ptr addrspace(1) %known_type_ptr)
+  ret void
+}
+
+define void @foo_untyped_arg(ptr addrspace(1) %arg) {
+entry:
+  ret void
+}
+
+define void @foo2(half %unused_arg1, ptr addrspace(1) %unknown_type_ptr) {
+entry:
+  ret void
+}
+
+define spir_kernel void @bar2(ptr addrspace(1) %known_type_ptr) {
+entry:
+  %elem = getelementptr inbounds i32, ptr addrspace(1) %known_type_ptr, i64 0
+  call void @foo2(half 1.0, ptr addrspace(1) %known_type_ptr)
+  ret void
+}
+
+define void @foo5(half %unused_arg1, ptr addrspace(1) %unknown_type_ptr1, half %unused_arg2, ptr addrspace(1) %unknown_type_ptr2, half %unused_arg3) {
+entry:
+  ret void
+}
+
+define spir_kernel void @bar5(ptr addrspace(1) %known_type_ptr) {
+entry:
+  %elem = getelementptr inbounds i32, ptr addrspace(1) %known_type_ptr, i64 0
+  call void @foo5(half 1.0, ptr addrspace(1) %known_type_ptr, half 1.0, ptr addrspace(1) %known_type_ptr, half 1.0)
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/asm-dialect-module.ll b/llvm/test/CodeGen/X86/asm-dialect-module.ll
new file mode 100644
index 0000000..2c00a44
--- /dev/null
+++ b/llvm/test/CodeGen/X86/asm-dialect-module.ll
@@ -0,0 +1,10 @@
+;; Test that we respect the assembler dialect when parsing module-level inline asm.
+; RUN: not llc < %s -mtriple=x86_64 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: llc < %s -mtriple=x86_64 -x86-asm-syntax=intel | FileCheck %s
+
+; ERR: <inline asm>:1:1: error: unknown use of instruction mnemonic without a size suffix
+
+; CHECK: .intel_syntax noprefix
+; CHECK: mov eax, eax
+
+module asm "mov eax, eax"
diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll
index 34ef23d..234c7a0a 100644
--- a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll
+++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll
@@ -553,8 +553,8 @@ define i8 @v8i32_or_select(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32
 ; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT:    vorps %ymm2, %ymm3, %ymm1
-; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    vorps %ymm0, %ymm3, %ymm0
+; AVX1-NEXT:    vorps %ymm2, %ymm0, %ymm0
 ; AVX1-NEXT:    vmovmskps %ymm0, %eax
 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX1-NEXT:    vzeroupper
@@ -571,8 +571,8 @@ define i8 @v8i32_or_select(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32
 ; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm2
 ; AVX2-NEXT:  .LBB7_3:
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpor %ymm2, %ymm3, %ymm1
-; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm3, %ymm0
+; AVX2-NEXT:    vpor %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vmovmskps %ymm0, %eax
 ; AVX2-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX2-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/combine-sra.ll b/llvm/test/CodeGen/X86/combine-sra.ll
index 0675ced..7eee418 100644
--- a/llvm/test/CodeGen/X86/combine-sra.ll
+++ b/llvm/test/CodeGen/X86/combine-sra.ll
@@ -521,3 +521,276 @@ define <4 x i32> @combine_vec_ashr_positive_splat(<4 x i32> %x, <4 x i32> %y) {
   %2 = ashr <4 x i32> %1, <i32 10, i32 10, i32 10, i32 10>
   ret <4 x i32> %2
 }
+
+define <8 x i16> @combine_vec8i16_ashr_clamped(<8 x i16> %x, <8 x i16> %y) {
+; SSE2-LABEL: combine_vec8i16_ashr_clamped:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT:    psubw %xmm2, %xmm1
+; SSE2-NEXT:    psllw $12, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psraw $15, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    psraw $8, %xmm0
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    paddw %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psraw $15, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    psraw $4, %xmm0
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    paddw %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psraw $15, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    psraw $2, %xmm0
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    paddw %xmm1, %xmm1
+; SSE2-NEXT:    psraw $15, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    psraw $1, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: combine_vec8i16_ashr_clamped:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    pminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    psllw $12, %xmm0
+; SSE41-NEXT:    psllw $4, %xmm1
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    paddw %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    psraw $8, %xmm3
+; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    psraw $4, %xmm3
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    psraw $2, %xmm3
+; SSE41-NEXT:    paddw %xmm1, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    psraw $1, %xmm3
+; SSE41-NEXT:    paddw %xmm1, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX2-LABEL: combine_vec8i16_ashr_clamped:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: combine_vec8i16_ashr_clamped:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsravw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %1 = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %y, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  %2 = ashr <8 x i16> %x, %1
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @combine_vec4i32_ashr_clamped(<4 x i32> %x, <4 x i32> %y) {
+; SSE2-LABEL: combine_vec4i32_ashr_clamped:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    psrld $27, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[2,3,3,3,4,5,6,7]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    psrad %xmm1, %xmm3
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm2[0,1,1,1,4,5,6,7]
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrad %xmm4, %xmm1
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm2[2,3,3,3,4,5,6,7]
+; SSE2-NEXT:    movdqa %xmm0, %xmm4
+; SSE2-NEXT:    psrad %xmm3, %xmm4
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,1,1,4,5,6,7]
+; SSE2-NEXT:    psrad %xmm2, %xmm0
+; SSE2-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm4[1]
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[0,3]
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: combine_vec4i32_ashr_clamped:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    psrad %xmm2, %xmm3
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
+; SSE41-NEXT:    pshuflw {{.*#+}} xmm4 = xmm2[2,3,3,3,4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm0, %xmm5
+; SSE41-NEXT:    psrad %xmm4, %xmm5
+; SSE41-NEXT:    pblendw {{.*#+}} xmm5 = xmm3[0,1,2,3],xmm5[4,5,6,7]
+; SSE41-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    psrad %xmm1, %xmm3
+; SSE41-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,1,1,1,4,5,6,7]
+; SSE41-NEXT:    psrad %xmm1, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3],xmm0[4,5],xmm5[6,7]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_vec4i32_ashr_clamped:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %y, <4 x i32> <i32 31, i32 31, i32 31, i32 31>)
+  %2 = ashr <4 x i32> %x, %1
+  ret <4 x i32> %2
+}
+
+define <4 x i64> @combine_vec4i64_ashr_clamped(<4 x i64> %x, <4 x i64> %y) {
+; SSE2-LABEL: combine_vec4i64_ashr_clamped:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483711,2147483711,2147483711,2147483711]
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm8
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [63,63]
+; SSE2-NEXT:    pand %xmm4, %xmm3
+; SSE2-NEXT:    pandn %xmm6, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm5, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm3[0,0,2,2]
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm3
+; SSE2-NEXT:    pand %xmm7, %xmm3
+; SSE2-NEXT:    pand %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm3
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    psrlq %xmm3, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[2,3,2,3]
+; SSE2-NEXT:    movdqa %xmm2, %xmm7
+; SSE2-NEXT:    psrlq %xmm6, %xmm7
+; SSE2-NEXT:    movsd {{.*#+}} xmm7 = xmm5[0],xmm7[1]
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    psrlq %xmm3, %xmm5
+; SSE2-NEXT:    psrlq %xmm6, %xmm0
+; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm5[0],xmm0[1]
+; SSE2-NEXT:    xorpd %xmm7, %xmm0
+; SSE2-NEXT:    psubq %xmm7, %xmm0
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    psrlq %xmm4, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
+; SSE2-NEXT:    psrlq %xmm5, %xmm2
+; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    psrlq %xmm4, %xmm3
+; SSE2-NEXT:    psrlq %xmm5, %xmm1
+; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
+; SSE2-NEXT:    xorpd %xmm2, %xmm1
+; SSE2-NEXT:    psubq %xmm2, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: combine_vec4i64_ashr_clamped:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    movdqa {{.*#+}} xmm7 = [9223372039002259456,9223372039002259456]
+; SSE41-NEXT:    movdqa %xmm3, %xmm0
+; SSE41-NEXT:    pxor %xmm7, %xmm0
+; SSE41-NEXT:    movdqa {{.*#+}} xmm8 = [9223372039002259519,9223372039002259519]
+; SSE41-NEXT:    movdqa %xmm8, %xmm6
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm9 = xmm0[0,0,2,2]
+; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [2147483711,2147483711,2147483711,2147483711]
+; SSE41-NEXT:    movdqa %xmm5, %xmm0
+; SSE41-NEXT:    pcmpgtd %xmm9, %xmm0
+; SSE41-NEXT:    pand %xmm6, %xmm0
+; SSE41-NEXT:    movapd {{.*#+}} xmm9 = [63,63]
+; SSE41-NEXT:    movapd %xmm9, %xmm6
+; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm6
+; SSE41-NEXT:    pxor %xmm2, %xmm7
+; SSE41-NEXT:    pcmpeqd %xmm7, %xmm8
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
+; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
+; SSE41-NEXT:    pand %xmm8, %xmm5
+; SSE41-NEXT:    movdqa %xmm5, %xmm0
+; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm9
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    psrlq %xmm9, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm9[2,3,2,3]
+; SSE41-NEXT:    movdqa %xmm0, %xmm5
+; SSE41-NEXT:    psrlq %xmm3, %xmm5
+; SSE41-NEXT:    pblendw {{.*#+}} xmm5 = xmm2[0,1,2,3],xmm5[4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm4, %xmm2
+; SSE41-NEXT:    psrlq %xmm9, %xmm2
+; SSE41-NEXT:    psrlq %xmm3, %xmm4
+; SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm2[0,1,2,3],xmm4[4,5,6,7]
+; SSE41-NEXT:    pxor %xmm5, %xmm4
+; SSE41-NEXT:    psubq %xmm5, %xmm4
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    psrlq %xmm6, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[2,3,2,3]
+; SSE41-NEXT:    psrlq %xmm3, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm1, %xmm2
+; SSE41-NEXT:    psrlq %xmm6, %xmm2
+; SSE41-NEXT:    psrlq %xmm3, %xmm1
+; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    pxor %xmm0, %xmm1
+; SSE41-NEXT:    psubq %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm4, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX2-LABEL: combine_vec4i64_ashr_clamped:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm4 = [9223372036854775870,9223372036854775870,9223372036854775870,9223372036854775870]
+; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm3, %ymm3
+; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm4 = [63,63,63,63]
+; AVX2-NEXT:    vblendvpd %ymm3, %ymm4, %ymm1, %ymm1
+; AVX2-NEXT:    vpsrlvq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: combine_vec4i64_ashr_clamped:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsravq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %y, <4 x i64> <i64 63, i64 63, i64 63, i64 63>)
+  %2 = ashr <4 x i64> %x, %1
+  ret <4 x i64> %2
+}
diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll
new file mode 100644
index 0000000..c73986e6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tls-desc.ll
@@ -0,0 +1,199 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i686 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64
+
+@x = thread_local global i32 0, align 4
+@y = internal thread_local global i32 1, align 4
+@z = external hidden thread_local global i32, align 4
+
+define ptr @f1() nounwind {
+; X86-LABEL: f1:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll .L0$pb
+; X86-NEXT:  .L0$pb:
+; X86-NEXT:    popl %ebx
+; X86-NEXT:  .Ltmp0:
+; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx
+; X86-NEXT:    #APP
+; X86-NEXT:    #NO_APP
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    leal x@tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *x@tlscall(%eax)
+; X86-NEXT:    addl %gs:0, %eax
+; X86-NEXT:    movl (%esp), %ebx # 4-byte Reload
+; X86-NEXT:    #APP
+; X86-NEXT:    #NO_APP
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X32-LABEL: f1:
+; X32:       # %bb.0:
+; X32-NEXT:    pushq %rax
+; X32-NEXT:    #APP
+; X32-NEXT:    #NO_APP
+; X32-NEXT:    leal x@tlsdesc(%rip), %eax
+; X32-NEXT:    callq *x@tlscall(%eax)
+; X32-NEXT:    # kill: def $eax killed $eax def $rax
+; X32-NEXT:    addl %fs:0, %eax
+; X32-NEXT:    #APP
+; X32-NEXT:    #NO_APP
+; X32-NEXT:    popq %rcx
+; X32-NEXT:    retq
+;
+; X64-LABEL: f1:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    #APP
+; X64-NEXT:    #NO_APP
+; X64-NEXT:    leaq x@tlsdesc(%rip), %rax
+; X64-NEXT:    callq *x@tlscall(%rax)
+; X64-NEXT:    addq %fs:0, %rax
+; X64-NEXT:    #APP
+; X64-NEXT:    #NO_APP
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %a = call { i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=r,=r,=r,=r,=r,=r,~{dirflag},~{fpsr},~{flags}"()
+  %b = call ptr @llvm.threadlocal.address.p0(ptr @x)
+  %a.0 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 0
+  %a.1 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 1
+  %a.2 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 2
+  %a.3 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 3
+  %a.4 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 4
+  %a.5 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 5
+  call void asm sideeffect "", "r,r,r,r,r,r,~{dirflag},~{fpsr},~{flags}"(i32 %a.0, i32 %a.1, i32 %a.2, i32 %a.3, i32 %a.4, i32 %a.5)
+  ret ptr %b
+}
+
+define i32 @f2() nounwind {
+; X86-LABEL: f2:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    calll .L1$pb
+; X86-NEXT:  .L1$pb:
+; X86-NEXT:    popl %ebx
+; X86-NEXT:  .Ltmp1:
+; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L1$pb), %ebx
+; X86-NEXT:    movl %gs:0, %ecx
+; X86-NEXT:    leal x@tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *x@tlscall(%eax)
+; X86-NEXT:    movl (%eax,%ecx), %eax
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X32-LABEL: f2:
+; X32:       # %bb.0:
+; X32-NEXT:    pushq %rax
+; X32-NEXT:    movl %fs:0, %ecx
+; X32-NEXT:    leal x@tlsdesc(%rip), %eax
+; X32-NEXT:    callq *x@tlscall(%eax)
+; X32-NEXT:    movl (%eax,%ecx), %eax
+; X32-NEXT:    popq %rcx
+; X32-NEXT:    retq
+;
+; X64-LABEL: f2:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movq %fs:0, %rcx
+; X64-NEXT:    leaq x@tlsdesc(%rip), %rax
+; X64-NEXT:    callq *x@tlscall(%rax)
+; X64-NEXT:    movl (%rax,%rcx), %eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+  %2 = load i32, ptr %1
+  ret i32 %2
+}
+
+define ptr @f3() nounwind {
+; X86-LABEL: f3:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    calll .L2$pb
+; X86-NEXT:  .L2$pb:
+; X86-NEXT:    popl %ebx
+; X86-NEXT:  .Ltmp2:
+; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.L2$pb), %ebx
+; X86-NEXT:    leal x@tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *x@tlscall(%eax)
+; X86-NEXT:    addl %gs:0, %eax
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X32-LABEL: f3:
+; X32:       # %bb.0:
+; X32-NEXT:    pushq %rax
+; X32-NEXT:    leal x@tlsdesc(%rip), %eax
+; X32-NEXT:    callq *x@tlscall(%eax)
+; X32-NEXT:    # kill: def $eax killed $eax def $rax
+; X32-NEXT:    addl %fs:0, %eax
+; X32-NEXT:    popq %rcx
+; X32-NEXT:    retq
+;
+; X64-LABEL: f3:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    leaq x@tlsdesc(%rip), %rax
+; X64-NEXT:    callq *x@tlscall(%rax)
+; X64-NEXT:    addq %fs:0, %rax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+  ret ptr %1
+}
+
+define i32 @f4() nounwind {
+; X86-LABEL: f4:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    calll .L3$pb
+; X86-NEXT:  .L3$pb:
+; X86-NEXT:    popl %ebx
+; X86-NEXT:  .Ltmp3:
+; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx
+; X86-NEXT:    movl %gs:0, %edx
+; X86-NEXT:    leal _TLS_MODULE_BASE_@tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *_TLS_MODULE_BASE_@tlscall(%eax)
+; X86-NEXT:    movl y@DTPOFF(%eax,%edx), %ecx
+; X86-NEXT:    addl z@DTPOFF(%eax,%edx), %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X32-LABEL: f4:
+; X32:       # %bb.0:
+; X32-NEXT:    pushq %rax
+; X32-NEXT:    movl %fs:0, %edx
+; X32-NEXT:    leal _TLS_MODULE_BASE_@tlsdesc(%rip), %eax
+; X32-NEXT:    callq *_TLS_MODULE_BASE_@tlscall(%eax)
+; X32-NEXT:    movl y@DTPOFF(%eax,%edx), %ecx
+; X32-NEXT:    addl z@DTPOFF(%eax,%edx), %ecx
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    popq %rcx
+; X32-NEXT:    retq
+;
+; X64-LABEL: f4:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movq %fs:0, %rdx
+; X64-NEXT:    leaq _TLS_MODULE_BASE_@tlsdesc(%rip), %rax
+; X64-NEXT:    callq *_TLS_MODULE_BASE_@tlscall(%rax)
+; X64-NEXT:    movl y@DTPOFF(%rax,%rdx), %ecx
+; X64-NEXT:    addl z@DTPOFF(%rax,%rdx), %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %1 = load i32, ptr @y, align 4
+  %2 = load i32, ptr @z, align 4
+  %3 = add nsw i32 %1, %2
+  ret i32 %3
+}
diff --git a/llvm/test/CodeGen/X86/vpdpwssd.ll b/llvm/test/CodeGen/X86/vpdpwssd.ll
new file mode 100644
index 0000000..e6a07b4
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vpdpwssd.ll
@@ -0,0 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+fast-dpwssd | FileCheck %s
+
+define <16 x i32> @vpdpwssd_test(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) {
+; CHECK-LABEL: vpdpwssd_test:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpdpwssd %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %4 = tail call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
+  ret <16 x i32> %4
+}
diff --git a/llvm/test/DebugInfo/ARM/hardware-loop-phi-insertion.ll b/llvm/test/DebugInfo/ARM/hardware-loop-phi-insertion.ll
new file mode 100644
index 0000000..9240bf2
--- /dev/null
+++ b/llvm/test/DebugInfo/ARM/hardware-loop-phi-insertion.ll
@@ -0,0 +1,84 @@
+; RUN: llc --stop-after=hardware-loops < %s | FileCheck %s
+
+;; Tests that Hardware Loop Insertion does not insert new phi nodes after debug
+;; records when they appear immediately after the last existing phi node.
+
+; CHECK-LABEL: for.body:
+; CHECK-NEXT: = phi i32
+; CHECK-NEXT: = phi i32
+; CHECK-NEXT: call void @llvm.dbg.value
+
+source_filename = "repro.c"
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv8.1m.main-arm-none-eabi"
+
+@z = dso_local local_unnamed_addr global i32 42, align 4, !dbg !0
+@arr = dso_local local_unnamed_addr global [10 x i32] zeroinitializer, align 4, !dbg !5
+
+define dso_local void @func1() local_unnamed_addr #0 !dbg !18 {
+entry:
+  %0 = load i32, ptr @z, align 4, !tbaa !26
+  br label %for.body, !dbg !30
+
+for.body:                                         ; preds = %entry, %for.body
+  %p1.04 = phi ptr [ @arr, %entry ], [ %incdec.ptr, %for.body ]
+  %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  tail call void @llvm.dbg.value(metadata ptr %p1.04, metadata !23, metadata !DIExpression()), !dbg !25
+  store i32 %0, ptr %p1.04, align 4, !dbg !32, !tbaa !26
+  %inc = add nuw nsw i32 %i.03, 1, !dbg !34
+  %incdec.ptr = getelementptr inbounds i8, ptr %p1.04, i32 4, !dbg !35
+  %exitcond.not = icmp eq i32 %inc, 10, !dbg !36
+  br i1 %exitcond.not, label %for.end, label %for.body, !dbg !30, !llvm.loop !37
+
+for.end:                                          ; preds = %for.body
+  ret void, !dbg !41
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!11, !12, !13, !14, !15, !16}
+!llvm.ident = !{!17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "z", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 19.0.0git", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "repro.c", directory: "/home/gbtozers/dev/upstream-llvm")
+!4 = !{!0, !5}
+!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression())
+!6 = distinct !DIGlobalVariable(name: "arr", scope: !2, file: !3, line: 1, type: !7, isLocal: false, isDefinition: true)
+!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 320, elements: !9)
+!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!9 = !{!10}
+!10 = !DISubrange(count: 10)
+!11 = !{i32 7, !"Dwarf Version", i32 5}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !{i32 1, !"wchar_size", i32 4}
+!14 = !{i32 1, !"min_enum_size", i32 4}
+!15 = !{i32 7, !"frame-pointer", i32 2}
+!16 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!17 = !{!"clang version 19.0.0git"}
+!18 = distinct !DISubprogram(name: "func1", scope: !3, file: !3, line: 4, type: !19, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{null}
+!21 = !{!23}
+!22 = !DILocalVariable(name: "i", scope: !18, file: !3, line: 6, type: !8)
+!23 = !DILocalVariable(name: "p1", scope: !18, file: !3, line: 7, type: !24)
+!24 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 32)
+!25 = !DILocation(line: 0, scope: !18)
+!26 = !{!27, !27, i64 0}
+!27 = !{!"int", !28, i64 0}
+!28 = !{!"omnipotent char", !29, i64 0}
+!29 = !{!"Simple C/C++ TBAA"}
+!30 = !DILocation(line: 8, column: 3, scope: !31)
+!31 = distinct !DILexicalBlock(scope: !18, file: !3, line: 8, column: 3)
+!32 = !DILocation(line: 9, column: 10, scope: !33)
+!33 = distinct !DILexicalBlock(scope: !31, file: !3, line: 8, column: 3)
+!34 = !DILocation(line: 8, column: 27, scope: !33)
+!35 = !DILocation(line: 8, column: 32, scope: !33)
+!36 = !DILocation(line: 8, column: 21, scope: !33)
+!37 = distinct !{!37, !30, !38, !39, !40}
+!38 = !DILocation(line: 9, column: 12, scope: !31)
+!39 = !{!"llvm.loop.mustprogress"}
+!40 = !{!"llvm.loop.unroll.disable"}
+!41 = !DILocation(line: 10, column: 1, scope: !18)
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll
index 5d068872..4359d53 100644
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll
@@ -85,7 +85,6 @@ exit:
   ret void
 }
 
-; FIXME: The fakeresume1 here should be marked as musttail.
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
 ; CHECK:      musttail call fastcc void @fakeresume1(ptr align 8 null)
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll
index 6ea81c6..2a14be0 100644
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll
@@ -88,7 +88,6 @@ exit:
   ret void
 }
 
-; FIXME: The fakeresume1 here should be marked as musttail.
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
 ; CHECK:         musttail call fastcc void @fakeresume1(ptr align 8 null)
diff --git a/llvm/test/Instrumentation/ThreadSanitizer/atomic.ll b/llvm/test/Instrumentation/ThreadSanitizer/atomic.ll
index 76afc4b..8b387cd 100644
--- a/llvm/test/Instrumentation/ThreadSanitizer/atomic.ll
+++ b/llvm/test/Instrumentation/ThreadSanitizer/atomic.ll
@@ -78,6 +78,26 @@ entry:
 ; CHECK-LABEL: atomic8_xchg_monotonic
 ; CHECK: call i8 @__tsan_atomic8_exchange(ptr %a, i8 0, i32 0), !dbg
 
+define void @atomic8_xchg_monotonic_ptr(ptr %a, ptr %b) nounwind uwtable {
+entry:
+  atomicrmw xchg ptr %a, ptr %b monotonic, !dbg !7
+  ret void, !dbg !7
+}
+; CHECK-LABEL: atomic8_xchg_monotonic_ptr
+; CHECK: [[ARG:%.*]] = ptrtoint ptr %b to i64, !dbg
+; CHECK: [[RES:%.*]] = call i64 @__tsan_atomic64_exchange(ptr %a, i64 [[ARG]], i32 0), !dbg
+; CHECK: [[CAST:%.*]] = inttoptr i64 [[RES]] to ptr, !dbg
+
+define void @atomic8_xchg_monotonic_float(ptr %a, float %b) nounwind uwtable {
+entry:
+  atomicrmw xchg ptr %a, float %b monotonic, !dbg !7
+  ret void, !dbg !7
+}
+; CHECK-LABEL: atomic8_xchg_monotonic_float
+; CHECK: [[ARG:%.*]] = bitcast float %b to i32, !dbg
+; CHECK: [[RES:%.*]] = call i32 @__tsan_atomic32_exchange(ptr %a, i32 [[ARG]], i32 0), !dbg
+; CHECK: [[CAST:%.*]] = bitcast i32 [[RES]] to float, !dbg
+
 define void @atomic8_add_monotonic(ptr %a) nounwind uwtable {
 entry:
   atomicrmw add ptr %a, i8 0 monotonic, !dbg !7
diff --git a/llvm/test/LTO/AArch64/link-branch-target-enforcement.ll b/llvm/test/LTO/AArch64/link-branch-target-enforcement.ll
index 74d9c86..ccf8cf6 100644
--- a/llvm/test/LTO/AArch64/link-branch-target-enforcement.ll
+++ b/llvm/test/LTO/AArch64/link-branch-target-enforcement.ll
@@ -32,7 +32,6 @@ entry:
 ; CHECK-DUMP: <main>:
 ; CHECK-DUMP:      bl      0x8 <main+0x8>
 ; CHECK-DUMP: <foo>:
-; CHECK-DUMP:     paciasp
 
 ; `main` doesn't support BTI while `foo` does, so in the binary
 ; we should see only PAC which is supported by both.
diff --git a/llvm/test/LTO/AArch64/link-sign-return-address.ll b/llvm/test/LTO/AArch64/link-sign-return-address.ll
deleted file mode 100644
index c25857c..0000000
--- a/llvm/test/LTO/AArch64/link-sign-return-address.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; Testcase to check that module with different branch-target-enforcement can
-; be mixed.
-;
-; RUN: llvm-as %s -o %t1.bc
-; RUN: llvm-as %p/Inputs/foo.ll -o %t2.bc
-; RUN: llvm-lto -exported-symbol main \
-; RUN:          -exported-symbol foo \
-; RUN:          -filetype=obj \
-; RUN:           %t2.bc %t1.bc \
-; RUN:           -o %t1.exe 2>&1
-; RUN: llvm-objdump -d %t1.exe | FileCheck --check-prefix=CHECK-DUMP %s
-; RUN: llvm-readelf -n %t1.exe | FileCheck --allow-empty --check-prefix=CHECK-PROP %s
-
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64-unknown-linux-gnu"
-
-declare i32 @foo();
-
-define i32 @main() {
-entry:
-  %add = call i32 @foo()
-  ret i32 %add
-}
-
-!llvm.module.flags = !{!0, !1, !2, !3 }
-!0 = !{i32 8, !"branch-target-enforcement", i32 0}
-!1 = !{i32 8, !"sign-return-address", i32 0}
-!2 = !{i32 8, !"sign-return-address-all", i32 0}
-!3 = !{i32 8, !"sign-return-address-with-bkey", i32 0}
-
-; CHECK-DUMP: <foo>:
-; CHECK-DUMP:     paciasp
-; CHECK-DUMP:     mov     w0, #0x2a
-; CHECK-DUMP:     autiasp
-; CHECK-DUMP:     ret
-; CHECK-DUMP: <main>:
-; CHECK-DUMP-NOT:  paciasp
-; CHECK-DUMP:      str     x30,
-; CHECK-DUMP:      bl      0x14 <main+0x4>
-
-; `main` doesn't support PAC sign-return-address while `foo` does, so in the binary
-; we should not see anything.
-; CHECK-PROP-NOT:   Properties: aarch64 feature: PAC
-\ No newline at end of file
diff --git a/llvm/test/Linker/link-arm-and-thumb.ll b/llvm/test/Linker/link-arm-and-thumb.ll
index 37bd8c3..a90f212 100644
--- a/llvm/test/Linker/link-arm-and-thumb.ll
+++ b/llvm/test/Linker/link-arm-and-thumb.ll
@@ -13,12 +13,11 @@ entry:
   ret i32 %add
 }
 
-; CHECK: define i32 @main() [[MAIN_ATTRS:#[0-9]+]]
+; CHECK: define i32 @main() {
 ; CHECK: define i32 @foo(i32 %a, i32 %b) [[ARM_ATTRS:#[0-9]+]]
 ; CHECK: define i32 @bar(i32 %a, i32 %b) [[THUMB_ATTRS:#[0-9]+]]
 
-; CHECK: attributes [[MAIN_ATTRS]] = { {{.*}} }
-; CHECK: attributes [[ARM_ATTRS]] = { {{.*}} "target-features"="-thumb-mode" }
-; CHECK: attributes [[THUMB_ATTRS]] = { {{.*}} "target-features"="+thumb-mode" }
+; CHECK: attributes [[ARM_ATTRS]] = { "target-features"="-thumb-mode" }
+; CHECK: attributes [[THUMB_ATTRS]] = { "target-features"="+thumb-mode" }
 
 ; STDERR-NOT: warning: Linking two modules of different target triples:
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_sop1.s
index 8a7f643..c4029b06 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_sop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_sop1.s
@@ -2964,6 +2964,9 @@ s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_SAVE_WAVE)
 s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_TBA)
 // GFX11: encoding: [0x85,0x4c,0x80,0xbe]
 
+s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_TBA_TO_PC)
+// GFX11: encoding: [0x86,0x4c,0x80,0xbe]
+
 s_ctz_i32_b32 s5, s1
 // GFX11: encoding: [0x01,0x08,0x85,0xbe]
 
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
index 4fd355f..939320e 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
@@ -3708,9 +3708,12 @@ s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_SAVE_WAVE)
 s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_TBA)
 // GFX12: encoding: [0x85,0x4c,0x80,0xbe]
 
-s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_SE_AID_ID)
+s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_TBA_TO_PC)
 // GFX12: encoding: [0x86,0x4c,0x80,0xbe]
 
+s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_SE_AID_ID)
+// GFX12: encoding: [0x87,0x4c,0x80,0xbe]
+
 s_ctz_i32_b32 s5, s1
 // GFX12: encoding: [0x01,0x08,0x85,0xbe]
 
diff --git a/llvm/test/MC/AMDGPU/mcexpr_amd.s b/llvm/test/MC/AMDGPU/mcexpr_amd.s
new file mode 100644
index 0000000..a9639c3a
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/mcexpr_amd.s
@@ -0,0 +1,130 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -filetype=obj < %s > %t
+// RUN: llvm-objdump --syms %t | FileCheck --check-prefix=OBJDUMP %s
+
+// OBJDUMP: SYMBOL TABLE:
+// OBJDUMP-NEXT: 0000000000000000 l       *ABS*  0000000000000000 zero
+// OBJDUMP-NEXT: 0000000000000001 l       *ABS*  0000000000000000 one
+// OBJDUMP-NEXT: 0000000000000002 l       *ABS*  0000000000000000 two
+// OBJDUMP-NEXT: 0000000000000003 l       *ABS*  0000000000000000 three
+// OBJDUMP-NEXT: 7fffffffffffffff l       *ABS*  0000000000000000 i64_max
+// OBJDUMP-NEXT: 8000000000000000 l       *ABS*  0000000000000000 i64_min
+// OBJDUMP-NEXT: 0000000000000005 l       *ABS*  0000000000000000 max_expression_all
+// OBJDUMP-NEXT: 0000000000000005 l       *ABS*  0000000000000000 five
+// OBJDUMP-NEXT: 0000000000000004 l       *ABS*  0000000000000000 four
+// OBJDUMP-NEXT: 0000000000000002 l       *ABS*  0000000000000000 max_expression_two
+// OBJDUMP-NEXT: 0000000000000001 l       *ABS*  0000000000000000 max_expression_one
+// OBJDUMP-NEXT: 000000000000000a l       *ABS*  0000000000000000 max_literals
+// OBJDUMP-NEXT: 000000000000000f l       *ABS*  0000000000000000 max_with_max_sym
+// OBJDUMP-NEXT: 000000000000000f l       *ABS*  0000000000000000 max
+// OBJDUMP-NEXT: ffffffffffffffff l       *ABS*  0000000000000000 neg_one
+// OBJDUMP-NEXT: ffffffffffffffff l       *ABS*  0000000000000000 max_neg_numbers
+// OBJDUMP-NEXT: ffffffffffffffff l       *ABS*  0000000000000000 max_neg_number
+// OBJDUMP-NEXT: 0000000000000003 l       *ABS*  0000000000000000 max_with_subexpr
+// OBJDUMP-NEXT: 0000000000000006 l       *ABS*  0000000000000000 max_as_subexpr
+// OBJDUMP-NEXT: 0000000000000005 l       *ABS*  0000000000000000 max_recursive_subexpr
+// OBJDUMP-NEXT: 7fffffffffffffff l       *ABS*  0000000000000000 max_expr_one_max
+// OBJDUMP-NEXT: 7fffffffffffffff l       *ABS*  0000000000000000 max_expr_two_max
+// OBJDUMP-NEXT: 7fffffffffffffff l       *ABS*  0000000000000000 max_expr_three_max
+// OBJDUMP-NEXT: 8000000000000000 l       *ABS*  0000000000000000 max_expr_one_min
+// OBJDUMP-NEXT: 0000000000000003 l       *ABS*  0000000000000000 max_expr_two_min
+// OBJDUMP-NEXT: 0000000000989680 l       *ABS*  0000000000000000 max_expr_three_min
+// OBJDUMP-NEXT: 0000000000000007 l       *ABS*  0000000000000000 or_expression_all
+// OBJDUMP-NEXT: 0000000000000003 l       *ABS*  0000000000000000 or_expression_two
+// OBJDUMP-NEXT: 0000000000000001 l       *ABS*  0000000000000000 or_expression_one
+// OBJDUMP-NEXT: 000000000000000f l       *ABS*  0000000000000000 or_literals
+// OBJDUMP-NEXT: 0000000000000000 l       *ABS*  0000000000000000 or_false
+// OBJDUMP-NEXT: 00000000000000ff l       *ABS*  0000000000000000 or_with_or_sym
+// OBJDUMP-NEXT: 00000000000000ff l       *ABS*  0000000000000000 or
+// OBJDUMP-NEXT: 0000000000000003 l       *ABS*  0000000000000000 or_with_subexpr
+// OBJDUMP-NEXT: 0000000000000008 l       *ABS*  0000000000000000 or_as_subexpr
+// OBJDUMP-NEXT: 0000000000000007 l       *ABS*  0000000000000000 or_recursive_subexpr
+
+// ASM: .set zero, 0
+// ASM: .set one, 1
+// ASM: .set two, 2
+// ASM: .set three, 3
+// ASM: .set i64_max, 9223372036854775807
+// ASM: .set i64_min, -9223372036854775808
+
+.set zero, 0
+.set one, 1
+.set two, 2
+.set three, 3
+.set i64_max, 0x7FFFFFFFFFFFFFFF
+.set i64_min, 0x8000000000000000
+
+// ASM: .set max_expression_all, max(1, 2, five, 3, four)
+// ASM: .set max_expression_two, 2
+// ASM: .set max_expression_one, 1
+// ASM: .set max_literals, 10
+// ASM: .set max_with_max_sym, max(max, 4, 3, 1, 2)
+
+.set max_expression_all, max(one, two, five, three, four)
+.set max_expression_two, max(one, two)
+.set max_expression_one, max(one)
+.set max_literals, max(1,2,3,4,5,6,7,8,9,10)
+.set max_with_max_sym, max(max, 4, 3, one, two)
+
+// ASM: .set max_neg_numbers, -1
+// ASM: .set max_neg_number, -1
+
+.set neg_one, -1
+.set max_neg_numbers, max(-5, -4, -3, -2, neg_one)
+.set max_neg_number, max(neg_one)
+
+// ASM: .set max_with_subexpr, 3
+// ASM: .set max_as_subexpr, 1+(max(4, 3, five))
+// ASM: .set max_recursive_subexpr, max(max(1, four), 3, max_expression_all)
+
+.set max_with_subexpr, max(((one | 3) << 3) / 8)
+.set max_as_subexpr, 1 + max(4, 3, five)
+.set max_recursive_subexpr, max(max(one, four), three, max_expression_all)
+
+// ASM: .set max_expr_one_max, 9223372036854775807
+// ASM: .set max_expr_two_max, max(9223372036854775807, five)
+// ASM: .set max_expr_three_max, max(9223372036854775807, five, 10000000)
+
+.set max_expr_one_max, max(i64_max)
+.set max_expr_two_max, max(i64_max, five)
+.set max_expr_three_max, max(i64_max, five, 10000000)
+
+// ASM: .set max_expr_one_min, -9223372036854775808
+// ASM: .set max_expr_two_min, 3
+// ASM: .set max_expr_three_min, 10000000
+
+.set max_expr_one_min, max(i64_min)
+.set max_expr_two_min, max(i64_min, three)
+.set max_expr_three_min, max(i64_min, three, 10000000)
+
+// ASM: .set or_expression_all, or(1, 2, five, 3, four)
+// ASM: .set or_expression_two, 3
+// ASM: .set or_expression_one, 1
+// ASM: .set or_literals, 15
+// ASM: .set or_false, 0
+// ASM: .set or_with_or_sym, or(or, 4, 3, 1, 2)
+
+.set or_expression_all, or(one, two, five, three, four)
+.set or_expression_two, or(one, two)
+.set or_expression_one, or(one)
+.set or_literals, or(1,2,3,4,5,6,7,8,9,10)
+.set or_false, or(zero, 0, (2-2), 5 > 6)
+.set or_with_or_sym, or(or, 4, 3, one, two)
+
+// ASM: .set or_with_subexpr, 3
+// ASM: .set or_as_subexpr, 1+(or(4, 3, five))
+// ASM: .set or_recursive_subexpr, or(or(1, four), 3, or_expression_all)
+
+.set or_with_subexpr, or(((one | 3) << 3) / 8)
+.set or_as_subexpr, 1 + or(4, 3, five)
+.set or_recursive_subexpr, or(or(one, four), three, or_expression_all)
+
+// ASM: .set four, 4
+// ASM: .set five, 5
+// ASM: .set max, 15
+// ASM: .set or, 255
+
+.set four, 4
+.set five, 5
+.set max, 0xF
+.set or, 0xFF
diff --git a/llvm/test/MC/AMDGPU/mcexpr_amd_err.s b/llvm/test/MC/AMDGPU/mcexpr_amd_err.s
new file mode 100644
index 0000000..ea02e01
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/mcexpr_amd_err.s
@@ -0,0 +1,53 @@
+// RUN: not llvm-mc -triple amdgcn-amd-amdhsa %s 2>&1 | FileCheck --check-prefix=ASM %s
+
+.set one, 1
+.set two, 2
+.set three, 3
+
+.set max_empty, max()
+// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: empty max expression
+// ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression
+
+.set or_empty, or()
+// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: empty or expression
+// ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression
+
+.set max_post_aux_comma, max(one,)
+// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: mismatch of commas in max expression
+// ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression
+
+.set max_pre_aux_comma, max(,one)
+// asm: :[[@line-1]]:{{[0-9]+}}: error: unknown token in expression
+// ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression
+
+.set max_double_comma, max(one,, two)
+// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: unknown token in expression
+// ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression
+
+.set max_no_comma, max(one two)
+// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: unexpected token in max expression
+// ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression
+
+.set max_missing_paren, max(two
+// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: unexpected token in max expression
+// ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression
+
+.set max_expression_one, max(three, four,
+// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: unknown token in expression
+// ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression
+
+.set or_expression_one, or(four, five
+// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: unexpected token in or expression
+// ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression
+
+.set max_no_lparen, max four, five)
+// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: expected newline
+
+.set max_no_paren, max one, two, three
+// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: expected newline
+
+.set max_rparen_only, max)
+// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: expected newline
+
+.set four, 4
+.set five, 5
diff --git a/llvm/test/MC/BPF/insn-unit.s b/llvm/test/MC/BPF/insn-unit.s
index 224eb73..84735d1 100644
--- a/llvm/test/MC/BPF/insn-unit.s
+++ b/llvm/test/MC/BPF/insn-unit.s
@@ -65,8 +65,10 @@
 // CHECK: 8d 02 00 00 00 00 00 00 	callx r2
 
 // ======== BPF_JMP Class ========
+  may_goto Llabel0 // BPF_JCOND | BPF_K
   if r1 & r2 goto Llabel0    // BPF_JSET  | BPF_X
   if r1 & 0xffff goto Llabel0    // BPF_JSET  | BPF_K
+// CHECK: e5 00 1e 00 00 00 00 00	may_goto +30
 // CHECK: 4d 21 1d 00 00 00 00 00 	if r1 & r2 goto +29
 // CHECK: 45 01 1c 00 ff ff 00 00 	if r1 & 65535 goto +28
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_sop1.txt
index fbb9545..929f3d2 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_sop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_sop1.txt
@@ -2536,6 +2536,9 @@
 # GFX11: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_TBA) ; encoding: [0x85,0x4c,0x80,0xbe]
 0x85,0x4c,0x80,0xbe
 
+# GFX11: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_TBA_TO_PC) ; encoding: [0x86,0x4c,0x80,0xbe]
+0x86,0x4c,0x80,0xbe
+
 # GFX11: s_setpc_b64 s[0:1]                      ; encoding: [0x00,0x48,0x80,0xbe]
 0x00,0x48,0x80,0xbe
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
index c87cea1..f8c235f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
@@ -3279,9 +3279,12 @@
 # GFX12: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_TBA) ; encoding: [0x85,0x4c,0x80,0xbe]
 0x85,0x4c,0x80,0xbe
 
-# GFX12: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_SE_AID_ID) ; encoding: [0x86,0x4c,0x80,0xbe]
+# GFX12: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_TBA_TO_PC) ; encoding: [0x86,0x4c,0x80,0xbe]
 0x86,0x4c,0x80,0xbe
 
+# GFX12: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_SE_AID_ID) ; encoding: [0x87,0x4c,0x80,0xbe]
+0x87,0x4c,0x80,0xbe
+
 # GFX12: s_setpc_b64 s[0:1]                      ; encoding: [0x00,0x48,0x80,0xbe]
 0x00,0x48,0x80,0xbe
 
diff --git a/llvm/test/TableGen/MacroFusion.td b/llvm/test/TableGen/MacroFusion.td
index ce76e7f..05c970c 100644
--- a/llvm/test/TableGen/MacroFusion.td
+++ b/llvm/test/TableGen/MacroFusion.td
@@ -46,11 +46,21 @@ def TestFusion: SimpleFusion<"test-fusion", "HasTestFusion", "Test Fusion",
                               CheckRegOperand<0, X0>
                              ]>>;
 
+let IsCommutable = 1 in
+def TestCommutableFusion: SimpleFusion<"test-commutable-fusion", "HasTestCommutableFusion",
+                                       "Test Commutable Fusion",
+                                       CheckOpcode<[Inst0]>,
+                                       CheckAll<[
+                                        CheckOpcode<[Inst1]>,
+                                        CheckRegOperand<0, X0>
+                                       ]>>;
+
 // CHECK-PREDICATOR:       #ifdef GET_Test_MACRO_FUSION_PRED_DECL
 // CHECK-PREDICATOR-NEXT:  #undef GET_Test_MACRO_FUSION_PRED_DECL
 // CHECK-PREDICATOR-EMPTY:
 // CHECK-PREDICATOR-NEXT:  namespace llvm {
 // CHECK-PREDICATOR-NEXT:  bool isTestBothFusionPredicate(const TargetInstrInfo &, const TargetSubtargetInfo &, const MachineInstr *, const MachineInstr &);
+// CHECK-PREDICATOR-NEXT:  bool isTestCommutableFusion(const TargetInstrInfo &, const TargetSubtargetInfo &, const MachineInstr *, const MachineInstr &);   
 // CHECK-PREDICATOR-NEXT:  bool isTestFusion(const TargetInstrInfo &, const TargetSubtargetInfo &, const MachineInstr *, const MachineInstr &);
 // CHECK-PREDICATOR-NEXT:  } // end namespace llvm
 // CHECK-PREDICATOR-EMPTY:
@@ -78,7 +88,7 @@ def TestFusion: SimpleFusion<"test-fusion", "HasTestFusion", "Test Fusion",
 // CHECK-PREDICATOR-NEXT:    }
 // CHECK-PREDICATOR-NEXT:    return true;
 // CHECK-PREDICATOR-NEXT:  }
-// CHECK-PREDICATOR-NEXT:  bool isTestFusion(
+// CHECK-PREDICATOR-NEXT:  bool isTestCommutableFusion(
 // CHECK-PREDICATOR-NEXT:      const TargetInstrInfo &TII,
 // CHECK-PREDICATOR-NEXT:      const TargetSubtargetInfo &STI,
 // CHECK-PREDICATOR-NEXT:      const MachineInstr *FirstMI,
@@ -99,14 +109,58 @@ def TestFusion: SimpleFusion<"test-fusion", "HasTestFusion", "Test Fusion",
 // CHECK-PREDICATOR-NEXT:      if (( MI->getOpcode() != Test::Inst0 ))
 // CHECK-PREDICATOR-NEXT:        return false;
 // CHECK-PREDICATOR-NEXT:    }
+// CHECK-PREDICATOR-NEXT:    if (!SecondMI.getOperand(0).getReg().isVirtual()) {
+// CHECK-PREDICATOR-NEXT:      if (SecondMI.getOperand(0).getReg() != SecondMI.getOperand(1).getReg()) {
+// CHECK-PREDICATOR-NEXT:        if (!SecondMI.getDesc().isCommutable())
+// CHECK-PREDICATOR-NEXT:          return false;
+// CHECK-PREDICATOR-NEXT:        unsigned SrcOpIdx1 = 1, SrcOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
+// CHECK-PREDICATOR-NEXT:        if (TII.findCommutedOpIndices(SecondMI, SrcOpIdx1, SrcOpIdx2))
+// CHECK-PREDICATOR-NEXT:          if (SecondMI.getOperand(0).getReg() != SecondMI.getOperand(SrcOpIdx2).getReg())
+// CHECK-PREDICATOR-NEXT:            return false;
+// CHECK-PREDICATOR-NEXT:      }
+// CHECK-PREDICATOR-NEXT:    }
+// CHECK-PREDICATOR-NEXT:    {
+// CHECK-PREDICATOR-NEXT:      Register FirstDest = FirstMI->getOperand(0).getReg();
+// CHECK-PREDICATOR-NEXT:      if (FirstDest.isVirtual() && !MRI.hasOneNonDBGUse(FirstDest))
+// CHECK-PREDICATOR-NEXT:        return false;
+// CHECK-PREDICATOR-NEXT:    }
+// CHECK-PREDICATOR-NEXT:    if (!(FirstMI->getOperand(0).isReg() &&
+// CHECK-PREDICATOR-NEXT:          SecondMI.getOperand(1).isReg() &&
+// CHECK-PREDICATOR-NEXT:          FirstMI->getOperand(0).getReg() == SecondMI.getOperand(1).getReg())) {
+// CHECK-PREDICATOR-NEXT:      if (!SecondMI.getDesc().isCommutable())
+// CHECK-PREDICATOR-NEXT:        return false;
+// CHECK-PREDICATOR-NEXT:      unsigned SrcOpIdx1 = 1, SrcOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
+// CHECK-PREDICATOR-NEXT:      if (TII.findCommutedOpIndices(SecondMI, SrcOpIdx1, SrcOpIdx2))
+// CHECK-PREDICATOR-NEXT:        if (FirstMI->getOperand(0).getReg() != SecondMI.getOperand(SrcOpIdx2).getReg())
+// CHECK-PREDICATOR-NEXT:          return false;
+// CHECK-PREDICATOR-NEXT:    }
+// CHECK-PREDICATOR-NEXT:    return true;
+// CHECK-PREDICATOR-NEXT:  }
+// CHECK-PREDICATOR-NEXT:  bool isTestFusion(
+// CHECK-PREDICATOR-NEXT:      const TargetInstrInfo &TII,
+// CHECK-PREDICATOR-NEXT:      const TargetSubtargetInfo &STI,
+// CHECK-PREDICATOR-NEXT:      const MachineInstr *FirstMI,
+// CHECK-PREDICATOR-NEXT:      const MachineInstr &SecondMI) {
+// CHECK-PREDICATOR-NEXT:    auto &MRI = SecondMI.getMF()->getRegInfo();
 // CHECK-PREDICATOR-NEXT:    {
 // CHECK-PREDICATOR-NEXT:      const MachineInstr *MI = &SecondMI;
 // CHECK-PREDICATOR-NEXT:      if (!(
-// CHECK-PREDICATOR-NEXT:          MI->getOperand(0).getReg().isVirtual()
-// CHECK-PREDICATOR-NEXT:          || MI->getOperand(0).getReg() == MI->getOperand(1).getReg()
+// CHECK-PREDICATOR-NEXT:          ( MI->getOpcode() == Test::Inst1 )
+// CHECK-PREDICATOR-NEXT:          && MI->getOperand(0).getReg() == Test::X0
 // CHECK-PREDICATOR-NEXT:        ))
 // CHECK-PREDICATOR-NEXT:        return false;
 // CHECK-PREDICATOR-NEXT:    }
+// CHECK-PREDICATOR-NEXT:    if (!FirstMI)
+// CHECK-PREDICATOR-NEXT:      return true;
+// CHECK-PREDICATOR-NEXT:    {
+// CHECK-PREDICATOR-NEXT:      const MachineInstr *MI = FirstMI;
+// CHECK-PREDICATOR-NEXT:      if (( MI->getOpcode() != Test::Inst0 ))
+// CHECK-PREDICATOR-NEXT:        return false;
+// CHECK-PREDICATOR-NEXT:    }
+// CHECK-PREDICATOR-NEXT:    if (!SecondMI.getOperand(0).getReg().isVirtual()) {
+// CHECK-PREDICATOR-NEXT:      if (SecondMI.getOperand(0).getReg() != SecondMI.getOperand(1).getReg())
+// CHECK-PREDICATOR-NEXT:        return false;
+// CHECK-PREDICATOR-NEXT:    }
 // CHECK-PREDICATOR-NEXT:    {
 // CHECK-PREDICATOR-NEXT:      Register FirstDest = FirstMI->getOperand(0).getReg();
 // CHECK-PREDICATOR-NEXT:      if (FirstDest.isVirtual() && !MRI.hasOneNonDBGUse(FirstDest))
@@ -131,6 +185,7 @@ def TestFusion: SimpleFusion<"test-fusion", "HasTestFusion", "Test Fusion",
 // CHECK-SUBTARGET:      std::vector<MacroFusionPredTy> TestGenSubtargetInfo::getMacroFusions() const {
 // CHECK-SUBTARGET-NEXT:   std::vector<MacroFusionPredTy> Fusions;
 // CHECK-SUBTARGET-NEXT:   if (hasFeature(Test::TestBothFusionPredicate)) Fusions.push_back(llvm::isTestBothFusionPredicate); 
+// CHECK-SUBTARGET-NEXT:   if (hasFeature(Test::TestCommutableFusion)) Fusions.push_back(llvm::isTestCommutableFusion); 
 // CHECK-SUBTARGET-NEXT:   if (hasFeature(Test::TestFusion)) Fusions.push_back(llvm::isTestFusion);
 // CHECK-SUBTARGET-NEXT:   return Fusions;
 // CHECK-SUBTARGET-NEXT: }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
index 9c2b1ec..d9dba92 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
@@ -88,7 +88,6 @@ exit:
   ret void
 }
 
-; FIXME: The fakeresume1 here should be marked as musttail.
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
 ; CHECK:      musttail call fastcc void @fakeresume1(ptr align 8 null)
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
index 860032b..2257d5a 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
@@ -88,7 +88,6 @@ exit:
   ret void
 }
 
-; FIXME: The fakeresume1 here should be marked as musttail.
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
 ; CHECK:         musttail call fastcc void @fakeresume1(ptr align 8 null)
diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll
index 3961fec..76f5248 100644
--- a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll
+++ b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll
@@ -15,12 +15,12 @@ define half @si129tohalf(i129 %a) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 129, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 128, [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP7]], 24
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 24
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 25, label [[ITOFP_SW_BB:%.*]]
-; CHECK-NEXT:    i32 26, label [[ITOFP_SW_EPILOG:%.*]]
+; CHECK-NEXT:      i32 25, label [[ITOFP_SW_BB:%.*]]
+; CHECK-NEXT:      i32 26, label [[ITOFP_SW_EPILOG:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[TMP3]], 1
@@ -100,12 +100,12 @@ define float @si129tofloat(i129 %a) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 129, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 128, [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP7]], 24
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 24
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 25, label [[ITOFP_SW_BB:%.*]]
-; CHECK-NEXT:    i32 26, label [[ITOFP_SW_EPILOG:%.*]]
+; CHECK-NEXT:      i32 25, label [[ITOFP_SW_BB:%.*]]
+; CHECK-NEXT:      i32 26, label [[ITOFP_SW_EPILOG:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[TMP3]], 1
@@ -184,12 +184,12 @@ define double @si129todouble(i129 %a) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 129, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 128, [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP7]], 53
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 53
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 54, label [[ITOFP_SW_BB:%.*]]
-; CHECK-NEXT:    i32 55, label [[ITOFP_SW_EPILOG:%.*]]
+; CHECK-NEXT:      i32 54, label [[ITOFP_SW_BB:%.*]]
+; CHECK-NEXT:      i32 55, label [[ITOFP_SW_EPILOG:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[TMP3]], 1
@@ -273,12 +273,12 @@ define x86_fp80 @si129tox86_fp80(i129 %a) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i129 129, [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 128, [[TMP4]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i129 [[TMP7]], 113
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i129 [[TMP6]], 113
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i129 114, label [[ITOFP_SW_BB:%.*]]
-; CHECK-NEXT:    i129 115, label [[ITOFP_SW_EPILOG:%.*]]
+; CHECK-NEXT:      i129 114, label [[ITOFP_SW_BB:%.*]]
+; CHECK-NEXT:      i129 115, label [[ITOFP_SW_EPILOG:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[TMP3]], 1
@@ -357,12 +357,12 @@ define fp128 @si129tofp128(i129 %a) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i129 129, [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 128, [[TMP4]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i129 [[TMP7]], 113
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i129 [[TMP6]], 113
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i129 114, label [[ITOFP_SW_BB:%.*]]
-; CHECK-NEXT:    i129 115, label [[ITOFP_SW_EPILOG:%.*]]
+; CHECK-NEXT:      i129 114, label [[ITOFP_SW_BB:%.*]]
+; CHECK-NEXT:      i129 115, label [[ITOFP_SW_EPILOG:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[TMP3]], 1
diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll
index e05ff19..96d87a5 100644
--- a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll
+++ b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll
@@ -15,12 +15,12 @@ define half @ui129tohalf(i129 %a) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 129, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 128, [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP7]], 24
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 24
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 25, label [[ITOFP_SW_BB:%.*]]
-; CHECK-NEXT:    i32 26, label [[ITOFP_SW_EPILOG:%.*]]
+; CHECK-NEXT:      i32 25, label [[ITOFP_SW_BB:%.*]]
+; CHECK-NEXT:      i32 26, label [[ITOFP_SW_EPILOG:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[A]], 1
@@ -100,12 +100,12 @@ define float @ui129tofloat(i129 %a) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 129, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 128, [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP7]], 24
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 24
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 25, label [[ITOFP_SW_BB:%.*]]
-; CHECK-NEXT:    i32 26, label [[ITOFP_SW_EPILOG:%.*]]
+; CHECK-NEXT:      i32 25, label [[ITOFP_SW_BB:%.*]]
+; CHECK-NEXT:      i32 26, label [[ITOFP_SW_EPILOG:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[A]], 1
@@ -184,12 +184,12 @@ define double @ui129todouble(i129 %a) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 129, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 128, [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP7]], 53
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 53
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 54, label [[ITOFP_SW_BB:%.*]]
-; CHECK-NEXT:    i32 55, label [[ITOFP_SW_EPILOG:%.*]]
+; CHECK-NEXT:      i32 54, label [[ITOFP_SW_BB:%.*]]
+; CHECK-NEXT:      i32 55, label [[ITOFP_SW_EPILOG:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[A]], 1
@@ -273,12 +273,12 @@ define x86_fp80 @ui129tox86_fp80(i129 %a) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i129 129, [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 128, [[TMP4]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i129 [[TMP7]], 113
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i129 [[TMP6]], 113
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i129 114, label [[ITOFP_SW_BB:%.*]]
-; CHECK-NEXT:    i129 115, label [[ITOFP_SW_EPILOG:%.*]]
+; CHECK-NEXT:      i129 114, label [[ITOFP_SW_BB:%.*]]
+; CHECK-NEXT:      i129 115, label [[ITOFP_SW_EPILOG:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[A]], 1
@@ -357,12 +357,12 @@ define fp128 @ui129tofp128(i129 %a) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i129 129, [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 128, [[TMP4]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i129 [[TMP7]], 113
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i129 [[TMP6]], 113
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i129 114, label [[ITOFP_SW_BB:%.*]]
-; CHECK-NEXT:    i129 115, label [[ITOFP_SW_EPILOG:%.*]]
+; CHECK-NEXT:      i129 114, label [[ITOFP_SW_BB:%.*]]
+; CHECK-NEXT:      i129 115, label [[ITOFP_SW_EPILOG:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[A]], 1
diff --git a/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll b/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll
index 0e21bf8..59a0241 100644
--- a/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll
+++ b/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll
@@ -493,3 +493,55 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   %cmp = icmp ult i32 %add, %length
   br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
 }
+
+; Test that we can handle shl and disjoint or in getExtendedOperandRecurrence.
+define void @foo7(i32 %n, ptr %a, i32 %x) {
+; CHECK-LABEL: @foo7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[ADD1]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[N]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_LR_PH]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i64 [[TMP2]], 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+;
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %add1 = add nsw i32 %x, 2
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %add2, %for.body ]
+  %mul = shl nsw i32 %i.07, 1
+  %add = or disjoint i32 %mul, 1
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
+  store i32 %i.07, ptr %arrayidx, align 4
+  %add2 = add nsw i32 %add1, %i.07
+  %cmp = icmp slt i32 %add2, %n
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
diff --git a/llvm/test/Transforms/InstCombine/binop-itofp.ll b/llvm/test/Transforms/InstCombine/binop-itofp.ll
index f796273..c72e4ac 100644
--- a/llvm/test/Transforms/InstCombine/binop-itofp.ll
+++ b/llvm/test/Transforms/InstCombine/binop-itofp.ll
@@ -1004,3 +1004,123 @@ define float @test_ui_add_with_signed_constant(i32 %shr.i) {
   %add = fadd float %sub, -16383.0
   ret float %add
 }
+
+
+;; Reduced form of bug noticed due to #82555
+define float @missed_nonzero_check_on_constant_for_si_fmul(i1 %c, i1 %.b, ptr %g_2345) {
+; CHECK-LABEL: @missed_nonzero_check_on_constant_for_si_fmul(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    [[CONV1_I:%.*]] = sitofp i16 [[CONV_I]] to float
+; CHECK-NEXT:    [[MUL3_I_I:%.*]] = fmul float [[CONV1_I]], 0.000000e+00
+; CHECK-NEXT:    store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
+; CHECK-NEXT:    ret float [[MUL3_I_I]]
+;
+  %sel = select i1 %c, i32 65529, i32 53264
+  %conv.i = trunc i32 %sel to i16
+  %conv1.i = sitofp i16 %conv.i to float
+  %mul3.i.i = fmul float %conv1.i, 0.000000e+00
+  store i32 %sel, ptr %g_2345, align 4
+  ret float %mul3.i.i
+}
+
+define <2 x float> @missed_nonzero_check_on_constant_for_si_fmul_vec(i1 %c, i1 %.b, ptr %g_2345) {
+; CHECK-LABEL: @missed_nonzero_check_on_constant_for_si_fmul_vec(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264
+; CHECK-NEXT:    [[CONV_I_S:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
+; CHECK-NEXT:    [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
+; CHECK-NEXT:    [[MUL3_I_I:%.*]] = fmul <2 x float> [[CONV1_I]], zeroinitializer
+; CHECK-NEXT:    store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
+; CHECK-NEXT:    ret <2 x float> [[MUL3_I_I]]
+;
+  %sel = select i1 %c, i32 65529, i32 53264
+  %conv.i.s = trunc i32 %sel to i16
+  %conv.i.v = insertelement <2 x i16> poison, i16 %conv.i.s, i64 0
+  %conv.i = insertelement <2 x i16> %conv.i.v, i16 %conv.i.s, i64 1
+  %conv1.i = sitofp <2 x i16> %conv.i to <2 x float>
+  %mul3.i.i = fmul <2 x float> %conv1.i, zeroinitializer
+  store i32 %sel, ptr %g_2345, align 4
+  ret <2 x float> %mul3.i.i
+}
+
+define float @negzero_check_on_constant_for_si_fmul(i1 %c, i1 %.b, ptr %g_2345) {
+; CHECK-LABEL: @negzero_check_on_constant_for_si_fmul(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    [[CONV1_I:%.*]] = sitofp i16 [[CONV_I]] to float
+; CHECK-NEXT:    [[MUL3_I_I:%.*]] = fmul float [[CONV1_I]], -0.000000e+00
+; CHECK-NEXT:    store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
+; CHECK-NEXT:    ret float [[MUL3_I_I]]
+;
+  %sel = select i1 %c, i32 65529, i32 53264
+  %conv.i = trunc i32 %sel to i16
+  %conv1.i = sitofp i16 %conv.i to float
+  %mul3.i.i = fmul float %conv1.i, -0.000000e+00
+  store i32 %sel, ptr %g_2345, align 4
+  ret float %mul3.i.i
+}
+
+define <2 x float> @nonzero_check_on_constant_for_si_fmul_vec_w_undef(i1 %c, i1 %.b, ptr %g_2345) {
+; CHECK-LABEL: @nonzero_check_on_constant_for_si_fmul_vec_w_undef(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264
+; CHECK-NEXT:    [[CONV_I_S:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
+; CHECK-NEXT:    [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
+; CHECK-NEXT:    [[MUL3_I_I:%.*]] = fmul <2 x float> [[CONV1_I]], <float undef, float 0.000000e+00>
+; CHECK-NEXT:    store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
+; CHECK-NEXT:    ret <2 x float> [[MUL3_I_I]]
+;
+  %sel = select i1 %c, i32 65529, i32 53264
+  %conv.i.s = trunc i32 %sel to i16
+  %conv.i.v = insertelement <2 x i16> poison, i16 %conv.i.s, i64 0
+  %conv.i = insertelement <2 x i16> %conv.i.v, i16 %conv.i.s, i64 1
+  %conv1.i = sitofp <2 x i16> %conv.i to <2 x float>
+  %mul3.i.i = fmul <2 x float> %conv1.i, <float undef, float 0.000000e+00>
+  store i32 %sel, ptr %g_2345, align 4
+  ret <2 x float> %mul3.i.i
+}
+
+define <2 x float> @nonzero_check_on_constant_for_si_fmul_nz_vec_w_undef(i1 %c, i1 %.b, ptr %g_2345) {
+; CHECK-LABEL: @nonzero_check_on_constant_for_si_fmul_nz_vec_w_undef(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264
+; CHECK-NEXT:    [[CONV_I_S:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
+; CHECK-NEXT:    [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
+; CHECK-NEXT:    [[MUL3_I_I:%.*]] = fmul <2 x float> [[CONV1_I]], <float undef, float 1.000000e+00>
+; CHECK-NEXT:    store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
+; CHECK-NEXT:    ret <2 x float> [[MUL3_I_I]]
+;
+  %sel = select i1 %c, i32 65529, i32 53264
+  %conv.i.s = trunc i32 %sel to i16
+  %conv.i.v = insertelement <2 x i16> poison, i16 %conv.i.s, i64 0
+  %conv.i = insertelement <2 x i16> %conv.i.v, i16 %conv.i.s, i64 1
+  %conv1.i = sitofp <2 x i16> %conv.i to <2 x float>
+  %mul3.i.i = fmul <2 x float> %conv1.i, <float undef, float 1.000000e+00>
+  store i32 %sel, ptr %g_2345, align 4
+  ret <2 x float> %mul3.i.i
+}
+
+define <2 x float> @nonzero_check_on_constant_for_si_fmul_negz_vec_w_undef(i1 %c, i1 %.b, ptr %g_2345) {
+; CHECK-LABEL: @nonzero_check_on_constant_for_si_fmul_negz_vec_w_undef(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264
+; CHECK-NEXT:    [[CONV_I_S:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
+; CHECK-NEXT:    [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
+; CHECK-NEXT:    [[MUL3_I_I:%.*]] = fmul <2 x float> [[CONV1_I]], <float undef, float -0.000000e+00>
+; CHECK-NEXT:    store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
+; CHECK-NEXT:    ret <2 x float> [[MUL3_I_I]]
+;
+  %sel = select i1 %c, i32 65529, i32 53264
+  %conv.i.s = trunc i32 %sel to i16
+  %conv.i.v = insertelement <2 x i16> poison, i16 %conv.i.s, i64 0
+  %conv.i = insertelement <2 x i16> %conv.i.v, i16 %conv.i.s, i64 1
+  %conv1.i = sitofp <2 x i16> %conv.i to <2 x float>
+  %mul3.i.i = fmul <2 x float> %conv1.i, <float undef, float -0.000000e+00>
+  store i32 %sel, ptr %g_2345, align 4
+  ret <2 x float> %mul3.i.i
+}
diff --git a/llvm/test/Transforms/InstCombine/extract-select-agg.ll b/llvm/test/Transforms/InstCombine/extract-select-agg.ll
new file mode 100644
index 0000000..6ba6b1a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/extract-select-agg.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i64 @test_select_agg_constant_agg(i64 %val, i1 %cond) {
+; CHECK-LABEL: define i64 @test_select_agg_constant_agg(
+; CHECK-SAME: i64 [[VAL:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RET:%.*]] = zext i1 [[COND]] to i64
+; CHECK-NEXT:    ret i64 [[RET]]
+;
+entry:
+  %sel = select i1 %cond, { i64, i64 } {i64 1, i64 2}, { i64, i64 } {i64 0, i64 3}
+  %ret = extractvalue { i64, i64 } %sel, 0
+  ret i64 %ret
+}
+
+define void @test_select_agg_constant_agg_multiuse(i64 %val, i1 %cond) {
+; CHECK-LABEL: define void @test_select_agg_constant_agg_multiuse(
+; CHECK-SAME: i64 [[VAL:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RET:%.*]] = zext i1 [[COND]] to i64
+; CHECK-NEXT:    call void @use(i64 [[RET]])
+; CHECK-NEXT:    [[V1:%.*]] = select i1 [[COND]], i64 2, i64 3
+; CHECK-NEXT:    call void @use(i64 [[V1]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %sel = select i1 %cond, { i64, i64 } {i64 1, i64 2}, { i64, i64 } {i64 0, i64 3}
+  %v0 = extractvalue { i64, i64 } %sel, 0
+  call void @use(i64 %v0)
+  %v1 = extractvalue { i64, i64 } %sel, 1
+  call void @use(i64 %v1)
+  ret void
+}
+
+; TODO: it can be folded to zext i1 %cond to i64
+define i64 @test_select_agg_constant(i64 %val, i1 %cond) {
+; CHECK-LABEL: define i64 @test_select_agg_constant(
+; CHECK-SAME: i64 [[VAL:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = insertvalue { i64, i64 } { i64 1, i64 poison }, i64 [[VAL]], 1
+; CHECK-NEXT:    [[B:%.*]] = insertvalue { i64, i64 } { i64 0, i64 poison }, i64 [[VAL]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], { i64, i64 } [[A]], { i64, i64 } [[B]]
+; CHECK-NEXT:    [[RET:%.*]] = extractvalue { i64, i64 } [[SEL]], 0
+; CHECK-NEXT:    ret i64 [[RET]]
+;
+entry:
+  %a = insertvalue { i64, i64 } { i64 1, i64 poison }, i64 %val, 1
+  %b = insertvalue { i64, i64 } { i64 0, i64 poison }, i64 %val, 1
+  %sel = select i1 %cond, { i64, i64 } %a, { i64, i64 } %b
+  %ret = extractvalue { i64, i64 } %sel, 0
+  ret i64 %ret
+}
+
+define void @test_select_agg_multiuse(i1 %cond, i64 %v1, i64 %v2, i64 %v3, i64 %v4) {
+; CHECK-LABEL: define void @test_select_agg_multiuse(
+; CHECK-SAME: i1 [[COND:%.*]], i64 [[V1:%.*]], i64 [[V2:%.*]], i64 [[V3:%.*]], i64 [[V4:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A0:%.*]] = insertvalue { i64, i64 } poison, i64 [[V1]], 0
+; CHECK-NEXT:    [[A1:%.*]] = insertvalue { i64, i64 } [[A0]], i64 [[V2]], 1
+; CHECK-NEXT:    [[B0:%.*]] = insertvalue { i64, i64 } poison, i64 [[V3]], 0
+; CHECK-NEXT:    [[B1:%.*]] = insertvalue { i64, i64 } [[B0]], i64 [[V4]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], { i64, i64 } [[A1]], { i64, i64 } [[B1]]
+; CHECK-NEXT:    [[X:%.*]] = extractvalue { i64, i64 } [[SEL]], 0
+; CHECK-NEXT:    call void @use(i64 [[X]])
+; CHECK-NEXT:    [[Y:%.*]] = extractvalue { i64, i64 } [[SEL]], 1
+; CHECK-NEXT:    call void @use(i64 [[Y]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a0 = insertvalue { i64, i64 } poison, i64 %v1, 0
+  %a1 = insertvalue { i64, i64 } %a0, i64 %v2, 1
+  %b0 = insertvalue { i64, i64 } poison, i64 %v3, 0
+  %b1 = insertvalue { i64, i64 } %b0, i64 %v4, 1
+  %sel = select i1 %cond, { i64, i64 } %a1, { i64, i64 } %b1
+  %x = extractvalue { i64, i64 } %sel, 0
+  call void @use(i64 %x)
+  %y = extractvalue { i64, i64 } %sel, 1
+  call void @use(i64 %y)
+  ret void
+}
+
+declare void @use(i64)
diff --git a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
index 0706092..410b6c2 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
@@ -226,12 +226,11 @@ define i1 @src_is_mask_shl_lshr(i8 %x_in, i8 %y, i1 %cond) {
 
 define i1 @src_is_mask_shl_lshr_fail_not_allones(i8 %x_in, i8 %y, i1 %cond) {
 ; CHECK-LABEL: @src_is_mask_shl_lshr_fail_not_allones(
-; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 -1, [[Y:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = and i8 [[TMP1]], -2
-; CHECK-NEXT:    [[NOTMASK:%.*]] = xor i8 [[MASK]], -1
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT:    [[TMP3:%.*]] = or i8 [[TMP2]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[TMP3]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -572,11 +571,10 @@ define i1 @src_is_notmask_neg_p2(i8 %x_in, i8 %y) {
 
 define i1 @src_is_notmask_neg_p2_fail_not_invertable(i8 %x_in, i8 %y) {
 ; CHECK-LABEL: @src_is_notmask_neg_p2_fail_not_invertable(
-; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y]], -1
-; CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[X]], [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = or i8 [[TMP2]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll
index e7141d7..a176d16 100644
--- a/llvm/test/Transforms/InstCombine/mul.ll
+++ b/llvm/test/Transforms/InstCombine/mul.ll
@@ -2049,3 +2049,94 @@ define i32 @zext_negpow2_use(i8 %x) {
   %r = mul i32 %zx, -16777216 ; -1 << 24
   ret i32 %r
 }
+
+define i32 @mul_sext_icmp_with_zero(i32 %x) {
+; CHECK-LABEL: @mul_sext_icmp_with_zero(
+; CHECK-NEXT:    ret i32 0
+;
+  %cmp = icmp eq i32 %x, 0
+  %sext = sext i1 %cmp to i32
+  %mul = mul i32 %sext, %x
+  ret i32 %mul
+}
+
+define i32 @test_mul_sext_bool(i1 %x, i32 %y) {
+; CHECK-LABEL: @test_mul_sext_bool(
+; CHECK-NEXT:    [[Y_NEG:%.*]] = sub i32 0, [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = select i1 [[X:%.*]], i32 [[Y_NEG]], i32 0
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %sext = sext i1 %x to i32
+  %mul = mul i32 %sext, %y
+  ret i32 %mul
+}
+
+define i32 @test_mul_sext_bool_nuw(i1 %x, i32 %y) {
+; CHECK-LABEL: @test_mul_sext_bool_nuw(
+; CHECK-NEXT:    [[Y_NEG:%.*]] = sub i32 0, [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = select i1 [[X:%.*]], i32 [[Y_NEG]], i32 0
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %sext = sext i1 %x to i32
+  %mul = mul nuw i32 %sext, %y
+  ret i32 %mul
+}
+
+define i32 @test_mul_sext_bool_nsw(i1 %x, i32 %y) {
+; CHECK-LABEL: @test_mul_sext_bool_nsw(
+; CHECK-NEXT:    [[Y_NEG:%.*]] = sub nsw i32 0, [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = select i1 [[X:%.*]], i32 [[Y_NEG]], i32 0
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %sext = sext i1 %x to i32
+  %mul = mul nsw i32 %sext, %y
+  ret i32 %mul
+}
+
+define i32 @test_mul_sext_bool_nuw_nsw(i1 %x, i32 %y) {
+; CHECK-LABEL: @test_mul_sext_bool_nuw_nsw(
+; CHECK-NEXT:    [[Y_NEG:%.*]] = sub nsw i32 0, [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = select i1 [[X:%.*]], i32 [[Y_NEG]], i32 0
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %sext = sext i1 %x to i32
+  %mul = mul nuw nsw i32 %sext, %y
+  ret i32 %mul
+}
+
+define i32 @test_mul_sext_bool_commuted(i1 %x, i32 %y) {
+; CHECK-LABEL: @test_mul_sext_bool_commuted(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[Y:%.*]], -2
+; CHECK-NEXT:    [[YY_NEG1:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    [[MUL:%.*]] = select i1 [[X:%.*]], i32 [[YY_NEG1]], i32 0
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %yy = xor i32 %y, 1
+  %sext = sext i1 %x to i32
+  %mul = mul i32 %yy, %sext
+  ret i32 %mul
+}
+
+define i32 @test_mul_sext_nonbool(i2 %x, i32 %y) {
+; CHECK-LABEL: @test_mul_sext_nonbool(
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i2 [[X:%.*]] to i32
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[SEXT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %sext = sext i2 %x to i32
+  %mul = mul i32 %sext, %y
+  ret i32 %mul
+}
+
+define i32 @test_mul_sext_multiuse(i1 %x, i32 %y) {
+; CHECK-LABEL: @test_mul_sext_multiuse(
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i1 [[X:%.*]] to i32
+; CHECK-NEXT:    tail call void @use(i32 [[SEXT]])
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[SEXT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %sext = sext i1 %x to i32
+  tail call void @use(i32 %sext)
+  %mul = mul i32 %sext, %y
+  ret i32 %mul
+}
diff --git a/llvm/test/Transforms/InstCombine/not.ll b/llvm/test/Transforms/InstCombine/not.ll
index f277d13..98b5d980 100644
--- a/llvm/test/Transforms/InstCombine/not.ll
+++ b/llvm/test/Transforms/InstCombine/not.ll
@@ -3,6 +3,8 @@
 
 declare void @use1(i1)
 declare void @use8(i8)
+declare void @f1()
+declare void @f2()
 
 define i32 @test1(i32 %A) {
 ; CHECK-LABEL: @test1(
@@ -858,3 +860,204 @@ define i32 @test_zext(i32 %a, i32 %b){
   %not = xor i32 %add, -1
   ret i32 %not
 }
+
+define void @test_invert_demorgan_or(i32 %a, i32 %b, i1 %cond) {
+; CHECK-LABEL: @test_invert_demorgan_or(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[B:%.*]], 0
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[B1:%.*]], 0
+; CHECK-NEXT:    [[OR_NOT1:%.*]] = and i1 [[CMP2]], [[CMP3]]
+; CHECK-NEXT:    [[MERGE:%.*]] = and i1 [[OR_NOT1]], [[COND:%.*]]
+; CHECK-NEXT:    br i1 [[MERGE]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    unreachable
+; CHECK:       if.else:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %cmp1 = icmp eq i32 %a, 0
+  %cmp2 = icmp ne i32 %b, 0
+  %or = or i1 %cmp1, %cmp2
+  %not = xor i1 %cond, true
+  %merge = or i1 %not, %or
+  br i1 %merge, label %if.then, label %if.else
+if.then:
+  call void @f1()
+  unreachable
+if.else:
+  call void @f2()
+  unreachable
+}
+
+define i1 @test_invert_demorgan_or2(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: @test_invert_demorgan_or2(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i64 [[A:%.*]], 24
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[B:%.*]], 60
+; CHECK-NEXT:    [[OR1_NOT1:%.*]] = and i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ult i64 [[C:%.*]], 60
+; CHECK-NEXT:    [[NOT:%.*]] = and i1 [[OR1_NOT1]], [[CMP3]]
+; CHECK-NEXT:    ret i1 [[NOT]]
+;
+  %cmp1 = icmp ugt i64 %a, 23
+  %cmp2 = icmp ugt i64 %b, 59
+  %or1 = or i1 %cmp1, %cmp2
+  %cmp3 = icmp ugt i64 %c, 59
+  %or2 = or i1 %or1, %cmp3
+  %not = xor i1 %or2, true
+  ret i1 %not
+}
+
+define i1 @test_invert_demorgan_or3(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_invert_demorgan_or3(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], 178206
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[B:%.*]], -196608
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[TMP1]], -1506
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[B]], -917760
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ult i32 [[TMP2]], -716213
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[B]], -1114112
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp ult i32 [[TMP3]], -196112
+; CHECK-NEXT:    [[OR1_NOT2:%.*]] = and i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    [[OR2_NOT1:%.*]] = and i1 [[OR1_NOT2]], [[CMP3]]
+; CHECK-NEXT:    [[NOT:%.*]] = and i1 [[OR2_NOT1]], [[CMP4]]
+; CHECK-NEXT:    ret i1 [[NOT]]
+;
+  %cmp1 = icmp eq i32 %a, 178206
+  %v1 = add i32 %b, -195102
+  %cmp2 = icmp ult i32 %v1, 1506
+  %v2 = add i32 %b, -201547
+  %cmp3 = icmp ult i32 %v2, 716213
+  %v3 = add i32 %b, -918000
+  %cmp4 = icmp ult i32 %v3, 196112
+  %or1 = or i1 %cmp1, %cmp2
+  %or2 = or i1 %or1, %cmp3
+  %or3 = or i1 %or2, %cmp4
+  %not = xor i1 %or3, true
+  ret i1 %not
+}
+
+define i1 @test_invert_demorgan_logical_or(i64 %x, i64 %y) {
+; CHECK-LABEL: @test_invert_demorgan_logical_or(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i64 [[X:%.*]], 27
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i64 [[Y:%.*]], 0
+; CHECK-NEXT:    [[SEL_NOT1:%.*]] = select i1 [[CMP1]], i1 [[CMP2]], i1 false
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ne i64 [[X]], 0
+; CHECK-NEXT:    [[NOT:%.*]] = and i1 [[CMP3]], [[SEL_NOT1]]
+; CHECK-NEXT:    ret i1 [[NOT]]
+;
+  %cmp1 = icmp eq i64 %x, 27
+  %cmp2 = icmp eq i64 %y, 0
+  %sel = select i1 %cmp1, i1 true, i1 %cmp2
+  %cmp3 = icmp eq i64 %x, 0
+  %or = or i1 %cmp3, %sel
+  %not = xor i1 %or, true
+  ret i1 %not
+}
+
+define i1 @test_invert_demorgan_and(i32 %a, i32 %b, i1 %cond) {
+; CHECK-LABEL: @test_invert_demorgan_and(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[B:%.*]], 0
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[B1:%.*]], 0
+; CHECK-NEXT:    [[AND_NOT1:%.*]] = or i1 [[CMP2]], [[CMP3]]
+; CHECK-NEXT:    [[MERGE:%.*]] = or i1 [[AND_NOT1]], [[COND:%.*]]
+; CHECK-NEXT:    br i1 [[MERGE]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    unreachable
+; CHECK:       if.else:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %cmp1 = icmp eq i32 %a, 0
+  %cmp2 = icmp ne i32 %b, 0
+  %and = and i1 %cmp1, %cmp2
+  %not = xor i1 %cond, true
+  %merge = and i1 %not, %and
+  br i1 %merge, label %if.then, label %if.else
+if.then:
+  call void @f1()
+  unreachable
+if.else:
+  call void @f2()
+  unreachable
+}
+
+define i64 @test_invert_demorgan_and2(i64 %x) {
+; CHECK-LABEL: @test_invert_demorgan_and2(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 0, [[X:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = or i64 [[TMP1]], -9223372036854775808
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %add = add i64 %x, 9223372036854775807
+  %and = and i64 %add, 9223372036854775807
+  %sub = xor i64 %and, -1
+  ret i64 %sub
+}
+
+define i1 @test_invert_demorgan_and3(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_invert_demorgan_and3(
+; CHECK-NEXT:    [[ADD:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ADD]], 4095
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 4095
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %not = xor i32 %a, -1
+  %add = add i32 %b, %not
+  %and = and i32 %add, 4095
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @test_invert_demorgan_logical_and(i64 %x, i64 %y) {
+; CHECK-LABEL: @test_invert_demorgan_logical_and(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i64 [[X:%.*]], 27
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i64 [[Y:%.*]], 0
+; CHECK-NEXT:    [[SEL_NOT1:%.*]] = select i1 [[CMP1]], i1 true, i1 [[CMP2]]
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ne i64 [[X]], 0
+; CHECK-NEXT:    [[NOT:%.*]] = and i1 [[CMP3]], [[SEL_NOT1]]
+; CHECK-NEXT:    ret i1 [[NOT]]
+;
+  %cmp1 = icmp eq i64 %x, 27
+  %cmp2 = icmp eq i64 %y, 0
+  %sel = select i1 %cmp1, i1 %cmp2, i1 false
+  %cmp3 = icmp eq i64 %x, 0
+  %or = or i1 %cmp3, %sel
+  %not = xor i1 %or, true
+  ret i1 %not
+}
+
+define i1 @test_invert_demorgan_and_multiuse(i32 %a, i32 %b, i1 %cond) {
+; CHECK-LABEL: @test_invert_demorgan_and_multiuse(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    call void @use1(i1 [[CMP1]])
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[B:%.*]], 0
+; CHECK-NEXT:    [[NOT:%.*]] = xor i1 [[COND:%.*]], true
+; CHECK-NEXT:    [[TMP0:%.*]] = and i1 [[CMP2]], [[NOT]]
+; CHECK-NEXT:    [[MERGE:%.*]] = and i1 [[TMP0]], [[CMP1]]
+; CHECK-NEXT:    br i1 [[MERGE]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    unreachable
+; CHECK:       if.else:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %cmp1 = icmp eq i32 %a, 0
+  call void @use1(i1 %cmp1)
+  %cmp2 = icmp ne i32 %b, 0
+  %and = and i1 %cmp1, %cmp2
+  %not = xor i1 %cond, true
+  %merge = and i1 %not, %and
+  br i1 %merge, label %if.then, label %if.else
+if.then:
+  call void @f1()
+  unreachable
+if.else:
+  call void @f2()
+  unreachable
+}
diff --git a/llvm/test/Transforms/InstCombine/powi.ll b/llvm/test/Transforms/InstCombine/powi.ll
index 89efbb6..43e34c8 100644
--- a/llvm/test/Transforms/InstCombine/powi.ll
+++ b/llvm/test/Transforms/InstCombine/powi.ll
@@ -125,22 +125,55 @@ entry:
   ret double %mul
 }
 
-define double @powi_fmul_powi_no_reassoc(double %x, i32 %y, i32 %z) {
-; CHECK-LABEL: @powi_fmul_powi_no_reassoc(
+; Negative test: Missing reassoc flag on fmul
+define double @powi_fmul_powi_no_reassoc1(double %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @powi_fmul_powi_no_reassoc1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[P1:%.*]] = tail call double @llvm.powi.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
-; CHECK-NEXT:    [[P2:%.*]] = tail call double @llvm.powi.f64.i32(double [[X]], i32 [[Z:%.*]])
+; CHECK-NEXT:    [[P1:%.*]] = tail call reassoc double @llvm.powi.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[P2:%.*]] = tail call reassoc double @llvm.powi.f64.i32(double [[X]], i32 [[Z:%.*]])
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[P2]], [[P1]]
 ; CHECK-NEXT:    ret double [[MUL]]
 ;
 entry:
-  %p1 = tail call double @llvm.powi.f64.i32(double %x, i32 %y)
-  %p2 = tail call double @llvm.powi.f64.i32(double %x, i32 %z)
+  %p1 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 %y)
+  %p2 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 %z)
   %mul = fmul double %p2, %p1
   ret double %mul
 }
 
+; Negative test: Missing reassoc flag on 2nd operand
+define double @powi_fmul_powi_no_reassoc2(double %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @powi_fmul_powi_no_reassoc2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = tail call reassoc double @llvm.powi.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[P2:%.*]] = tail call double @llvm.powi.f64.i32(double [[X]], i32 [[Z:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[P2]], [[P1]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+entry:
+  %p1 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 %y)
+  %p2 = tail call double @llvm.powi.f64.i32(double %x, i32 %z)
+  %mul = fmul reassoc double %p2, %p1
+  ret double %mul
+}
 
+; Negative test: Missing reassoc flag on 1st operand
+define double @powi_fmul_powi_no_reassoc3(double %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @powi_fmul_powi_no_reassoc3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = tail call double @llvm.powi.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[P2:%.*]] = tail call reassoc double @llvm.powi.f64.i32(double [[X]], i32 [[Z:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[P2]], [[P1]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+entry:
+  %p1 = tail call double @llvm.powi.f64.i32(double %x, i32 %y)
+  %p2 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 %z)
+  %mul = fmul reassoc double %p2, %p1
+  ret double %mul
+}
+
+; All of the fmul and its operands should have the reassoc flags
 define double @powi_fmul_powi(double %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @powi_fmul_powi(
 ; CHECK-NEXT:  entry:
@@ -149,8 +182,8 @@ define double @powi_fmul_powi(double %x, i32 %y, i32 %z) {
 ; CHECK-NEXT:    ret double [[MUL]]
 ;
 entry:
-  %p1 = tail call double @llvm.powi.f64.i32(double %x, i32 %y)
-  %p2 = tail call double @llvm.powi.f64.i32(double %x, i32 %z)
+  %p1 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 %y)
+  %p2 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 %z)
   %mul = fmul reassoc double %p2, %p1
   ret double %mul
 }
@@ -163,8 +196,8 @@ define double @powi_fmul_powi_fast_on_fmul(double %x, i32 %y, i32 %z) {
 ; CHECK-NEXT:    ret double [[MUL]]
 ;
 entry:
-  %p1 = tail call double @llvm.powi.f64.i32(double %x, i32 %y)
-  %p2 = tail call double @llvm.powi.f64.i32(double %x, i32 %z)
+  %p1 = tail call fast double @llvm.powi.f64.i32(double %x, i32 %y)
+  %p2 = tail call fast double @llvm.powi.f64.i32(double %x, i32 %z)
   %mul = fmul fast double %p2, %p1
   ret double %mul
 }
@@ -192,8 +225,23 @@ define double @powi_fmul_powi_same_power(double %x, i32 %y, i32 %z) {
 ; CHECK-NEXT:    ret double [[MUL]]
 ;
 entry:
-  %p1 = tail call double @llvm.powi.f64.i32(double %x, i32 %y)
-  %p2 = tail call double @llvm.powi.f64.i32(double %x, i32 %y)
+  %p1 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 %y)
+  %p2 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 %y)
+  %mul = fmul reassoc double %p2, %p1
+  ret double %mul
+}
+
+define double @powi_fmul_powi_different_integer_types(double %x, i32 %y, i16 %z) {
+; CHECK-LABEL: @powi_fmul_powi_different_integer_types(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = tail call reassoc double @llvm.powi.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[P2:%.*]] = tail call reassoc double @llvm.powi.f64.i16(double [[X]], i16 [[Z:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[P2]], [[P1]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+entry:
+  %p1 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 %y)
+  %p2 = tail call reassoc double @llvm.powi.f64.i16(double %x, i16 %z)
   %mul = fmul reassoc double %p2, %p1
   ret double %mul
 }
@@ -201,16 +249,16 @@ entry:
 define double @powi_fmul_powi_use_first(double %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @powi_fmul_powi_use_first(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[P1:%.*]] = tail call double @llvm.powi.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[P1:%.*]] = tail call reassoc double @llvm.powi.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
 ; CHECK-NEXT:    tail call void @use(double [[P1]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[Y]], [[Z:%.*]]
 ; CHECK-NEXT:    [[MUL:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[X]], i32 [[TMP0]])
 ; CHECK-NEXT:    ret double [[MUL]]
 ;
 entry:
-  %p1 = tail call double @llvm.powi.f64.i32(double %x, i32 %y)
+  %p1 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 %y)
   tail call void @use(double %p1)
-  %p2 = tail call double @llvm.powi.f64.i32(double %x, i32 %z)
+  %p2 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 %z)
   %mul = fmul reassoc double %p1, %p2
   ret double %mul
 }
@@ -218,16 +266,16 @@ entry:
 define double @powi_fmul_powi_use_second(double %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @powi_fmul_powi_use_second(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[P1:%.*]] = tail call double @llvm.powi.f64.i32(double [[X:%.*]], i32 [[Z:%.*]])
+; CHECK-NEXT:    [[P1:%.*]] = tail call reassoc double @llvm.powi.f64.i32(double [[X:%.*]], i32 [[Z:%.*]])
 ; CHECK-NEXT:    tail call void @use(double [[P1]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], [[Z]]
 ; CHECK-NEXT:    [[MUL:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[X]], i32 [[TMP0]])
 ; CHECK-NEXT:    ret double [[MUL]]
 ;
 entry:
-  %p1 = tail call double @llvm.powi.f64.i32(double %x, i32 %z)
+  %p1 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 %z)
   tail call void @use(double %p1)
-  %p2 = tail call double @llvm.powi.f64.i32(double %x, i32 %y)
+  %p2 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 %y)
   %mul = fmul reassoc double %p2, %p1
   ret double %mul
 }
@@ -333,11 +381,60 @@ define double @fdiv_pow_powi_negative(double %x) {
 ; Negative test: The 2nd powi argument is a variable
 define double @fdiv_pow_powi_negative_variable(double %x, i32 %y) {
 ; CHECK-LABEL: @fdiv_pow_powi_negative_variable(
-; CHECK-NEXT:    [[P1:%.*]] = call double @llvm.powi.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[P1:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[X:%.*]], i32 [[Y:%.*]])
 ; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc nnan double [[P1]], [[X]]
 ; CHECK-NEXT:    ret double [[DIV]]
 ;
-  %p1 = call double @llvm.powi.f64.i32(double %x, i32 %y)
+  %p1 = call reassoc double @llvm.powi.f64.i32(double %x, i32 %y)
   %div = fdiv reassoc nnan double %p1, %x
   ret double %div
 }
+
+; powi(X, Y) * X --> powi(X, Y+1)
+define double @powi_fmul_powi_x(double noundef %x) {
+; CHECK-LABEL: @powi_fmul_powi_x(
+; CHECK-NEXT:    [[MUL:%.*]] = call reassoc double @llvm.powi.f64.i32(double [[X:%.*]], i32 4)
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %p1 = tail call reassoc double @llvm.powi.f64.i32(double %x, i32 3)
+  %mul = fmul reassoc double %p1, %x
+  ret double %mul
+}
+
+; Negative test: Multi-use
+define double @powi_fmul_powi_x_multi_use(double noundef %x) {
+; CHECK-LABEL: @powi_fmul_powi_x_multi_use(
+; CHECK-NEXT:    [[P1:%.*]] = tail call double @llvm.powi.f64.i32(double [[X:%.*]], i32 3)
+; CHECK-NEXT:    tail call void @use(double [[P1]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[P1]], [[X]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %p1 = tail call double @llvm.powi.f64.i32(double %x, i32 3)
+  tail call void @use(double %p1)
+  %mul = fmul reassoc double %p1, %x
+  ret double %mul
+}
+
+; Negative test: Miss fmf flag
+define double @powi_fmul_powi_x_missing_reassoc(double noundef %x) {
+; CHECK-LABEL: @powi_fmul_powi_x_missing_reassoc(
+; CHECK-NEXT:    [[P1:%.*]] = tail call double @llvm.powi.f64.i32(double [[X:%.*]], i32 3)
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[P1]], [[X]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %p1 = tail call double @llvm.powi.f64.i32(double %x, i32 3)
+  %mul = fmul double %p1, %x
+  ret double %mul
+}
+
+; Negative test: overflow
+define double @powi_fmul_powi_x_overflow(double noundef %x) {
+; CHECK-LABEL: @powi_fmul_powi_x_overflow(
+; CHECK-NEXT:    [[P1:%.*]] = tail call double @llvm.powi.f64.i32(double [[X:%.*]], i32 2147483647)
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[P1]], [[X]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %p1 = tail call double @llvm.powi.f64.i32(double %x, i32 2147483647) ; INT_MAX
+  %mul = fmul reassoc double %p1, %x
+  ret double %mul
+}
diff --git a/llvm/test/Transforms/InstCombine/pr63791.ll b/llvm/test/Transforms/InstCombine/pr63791.ll
index 78cc113..73a559f9 100644
--- a/llvm/test/Transforms/InstCombine/pr63791.ll
+++ b/llvm/test/Transforms/InstCombine/pr63791.ll
@@ -15,7 +15,7 @@ define void @y() {
 ; CHECK-NEXT:    store i1 true, ptr poison, align 1
 ; CHECK-NEXT:    br i1 poison, label [[FOR_COND_I]], label [[FOR_COND5_PREHEADER_I]]
 ; CHECK:       for.cond5.preheader.i:
-; CHECK-NEXT:    br i1 false, label [[FOR_INC19_I:%.*]], label [[FOR_COND1_LOOPEXIT_I:%.*]]
+; CHECK-NEXT:    br i1 true, label [[FOR_COND1_LOOPEXIT_I:%.*]], label [[FOR_INC19_I:%.*]]
 ; CHECK:       for.inc19.i:
 ; CHECK-NEXT:    br i1 poison, label [[FOR_INC19_I]], label [[FOR_COND1_LOOPEXIT_I]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/sub-xor-cmp.ll b/llvm/test/Transforms/InstCombine/sub-xor-cmp.ll
index 2e1ff0a..461c9b0 100644
--- a/llvm/test/Transforms/InstCombine/sub-xor-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/sub-xor-cmp.ll
@@ -117,11 +117,9 @@ define i64 @sext_diff_i1_xor_sub_1(i64 %a, i1 %b, i1 %c) {
 define i64 @sext_multi_uses(i64 %a, i1 %b, i64 %x) {
 ; CHECK-LABEL: define i64 @sext_multi_uses(
 ; CHECK-SAME: i64 [[A:%.*]], i1 [[B:%.*]], i64 [[X:%.*]]) {
-; CHECK-NEXT:    [[C:%.*]] = sext i1 [[B]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 0, [[A]]
-; CHECK-NEXT:    [[E:%.*]] = select i1 [[B]], i64 [[TMP1]], i64 [[A]]
-; CHECK-NEXT:    [[F:%.*]] = mul i64 [[C]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = add i64 [[F]], [[E]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[X]], [[A]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[B]], i64 [[TMP2]], i64 [[A]]
 ; CHECK-NEXT:    ret i64 [[R]]
 ;
   %c = sext i1 %b to i64
diff --git a/llvm/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll b/llvm/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll
index 978f061..6f0ef19 100644
--- a/llvm/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll
+++ b/llvm/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll
@@ -1,86 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn | llvm-dis
 
-	%struct.PerlInterpreter = type { i8 }
+  %struct.PerlInterpreter = type { i8 }
 @PL_sv_count = external global i32		; <ptr> [#uses=2]
 
 define void @perl_destruct(ptr %sv_interp) {
 entry:
-	br i1 false, label %cond_next25, label %cond_true16
+  br i1 false, label %cond_next25, label %cond_true16
 
 cond_true16:		; preds = %entry
-	ret void
+  ret void
 
 cond_next25:		; preds = %entry
-	br i1 false, label %cond_next33, label %cond_true32
+  br i1 false, label %cond_next33, label %cond_true32
 
 cond_true32:		; preds = %cond_next25
-	ret void
+  ret void
 
 cond_next33:		; preds = %cond_next25
-	br i1 false, label %cond_next61, label %cond_true.i46
+  br i1 false, label %cond_next61, label %cond_true.i46
 
 cond_true.i46:		; preds = %cond_next33
-	ret void
+  ret void
 
 cond_next61:		; preds = %cond_next33
-	br i1 false, label %cond_next69, label %cond_true66
+  br i1 false, label %cond_next69, label %cond_true66
 
 cond_true66:		; preds = %cond_next61
-	ret void
+  ret void
 
 cond_next69:		; preds = %cond_next61
-	br i1 false, label %Perl_safefree.exit52, label %cond_true.i50
+  br i1 false, label %Perl_safefree.exit52, label %cond_true.i50
 
 cond_true.i50:		; preds = %cond_next69
-	ret void
+  ret void
 
 Perl_safefree.exit52:		; preds = %cond_next69
-	br i1 false, label %cond_next80, label %cond_true77
+  br i1 false, label %cond_next80, label %cond_true77
 
 cond_true77:		; preds = %Perl_safefree.exit52
-	ret void
+  ret void
 
 cond_next80:		; preds = %Perl_safefree.exit52
-	br i1 false, label %Perl_safefree.exit56, label %cond_true.i54
+  br i1 false, label %Perl_safefree.exit56, label %cond_true.i54
 
 cond_true.i54:		; preds = %cond_next80
-	ret void
+  ret void
 
 Perl_safefree.exit56:		; preds = %cond_next80
-	br i1 false, label %Perl_safefree.exit60, label %cond_true.i58
+  br i1 false, label %Perl_safefree.exit60, label %cond_true.i58
 
 cond_true.i58:		; preds = %Perl_safefree.exit56
-	ret void
+  ret void
 
 Perl_safefree.exit60:		; preds = %Perl_safefree.exit56
-	br i1 false, label %Perl_safefree.exit64, label %cond_true.i62
+  br i1 false, label %Perl_safefree.exit64, label %cond_true.i62
 
 cond_true.i62:		; preds = %Perl_safefree.exit60
-	ret void
+  ret void
 
 Perl_safefree.exit64:		; preds = %Perl_safefree.exit60
-	br i1 false, label %Perl_safefree.exit68, label %cond_true.i66
+  br i1 false, label %Perl_safefree.exit68, label %cond_true.i66
 
 cond_true.i66:		; preds = %Perl_safefree.exit64
-	ret void
+  ret void
 
 Perl_safefree.exit68:		; preds = %Perl_safefree.exit64
-	br i1 false, label %cond_next150, label %cond_true23.i
+  br i1 false, label %cond_next150, label %cond_true23.i
 
 cond_true23.i:		; preds = %Perl_safefree.exit68
-	ret void
+  ret void
 
 cond_next150:		; preds = %Perl_safefree.exit68
-	%tmp16092 = load i32, ptr @PL_sv_count, align 4		; <i32> [#uses=0]
-	br label %cond_next165
+  %tmp16092 = load i32, ptr @PL_sv_count, align 4		; <i32> [#uses=0]
+  br label %cond_next165
 
 bb157:		; preds = %cond_next165
-	%tmp158 = load i32, ptr @PL_sv_count, align 4		; <i32> [#uses=0]
-	br label %cond_next165
+  %tmp158 = load i32, ptr @PL_sv_count, align 4		; <i32> [#uses=0]
+  br label %cond_next165
 
 cond_next165:		; preds = %bb157, %cond_next150
-	br i1 false, label %bb171, label %bb157
+  br i1 false, label %bb171, label %bb157
 
 bb171:		; preds = %cond_next165
-	ret void
+  ret void
 }
diff --git a/llvm/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll b/llvm/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll
index abb6fbe..5202a2b 100644
--- a/llvm/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll
+++ b/llvm/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll
@@ -1,15 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 
-	%struct.INT2 = type { i32, i32 }
+  %struct.INT2 = type { i32, i32 }
 @blkshifts = external global ptr		; <ptr> [#uses=2]
 
 define i32 @xcompact() {
+; CHECK-LABEL: define i32 @xcompact() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store ptr null, ptr @blkshifts, align 4
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    br label [[BB]]
+;
 entry:
-	store ptr null, ptr @blkshifts, align 4
-	br label %bb
+  store ptr null, ptr @blkshifts, align 4
+  br label %bb
 
 bb:		; preds = %bb, %entry
-	%tmp10 = load ptr, ptr @blkshifts, align 4		; <ptr> [#uses=0]
-; CHECK-NOT:  %tmp10
-	br label %bb
+  %tmp10 = load ptr, ptr @blkshifts, align 4		; <ptr> [#uses=0]
+  br label %bb
 }
diff --git a/llvm/test/Transforms/NewGVN/2007-07-25-Loop.ll b/llvm/test/Transforms/NewGVN/2007-07-25-Loop.ll
index 336f390..2ee599c1 100644
--- a/llvm/test/Transforms/NewGVN/2007-07-25-Loop.ll
+++ b/llvm/test/Transforms/NewGVN/2007-07-25-Loop.ll
@@ -1,15 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn | llvm-dis
 
-	%struct.s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
+  %struct.s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
 
 define void @print_arch(ptr %arch_file, i32 %route_type, i64 %det_routing_arch.0.0, i64 %det_routing_arch.0.1, i64 %det_routing_arch.0.2, i64 %det_routing_arch.0.3, i64 %det_routing_arch.0.4, ptr %segment_inf, i64 %timing_inf.0.0, i64 %timing_inf.0.1, i64 %timing_inf.0.2, i64 %timing_inf.0.3, i64 %timing_inf.0.4, i32 %timing_inf.1) {
 entry:
-	br i1 false, label %bb278, label %bb344
+  br i1 false, label %bb278, label %bb344
 
 bb278:		; preds = %bb278, %entry
-	br i1 false, label %bb278, label %bb344
+  br i1 false, label %bb278, label %bb344
 
 bb344:		; preds = %bb278, %entry
-	%tmp38758 = load i16, ptr null, align 2		; <i16> [#uses=0]
-	ret void
+  %tmp38758 = load i16, ptr null, align 2		; <i16> [#uses=0]
+  ret void
 }
diff --git a/llvm/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll b/llvm/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll
index c46f2b7..e7461c2 100644
--- a/llvm/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll
+++ b/llvm/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll
@@ -1,38 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn | llvm-dis
 
-	%struct.TypHeader = type { i32, ptr, [3 x i8], i8 }
+  %struct.TypHeader = type { i32, ptr, [3 x i8], i8 }
 
 define ptr @LtRec(ptr %hdL, ptr %hdR) {
 entry:
-	br i1 false, label %bb556.preheader, label %bb534.preheader
+  br i1 false, label %bb556.preheader, label %bb534.preheader
 
 bb534.preheader:		; preds = %entry
-	ret ptr null
+  ret ptr null
 
 bb556.preheader:		; preds = %entry
-	%tmp56119 = getelementptr %struct.TypHeader, ptr %hdR, i32 0, i32 0		; <ptr> [#uses=1]
-	%tmp56220 = load i32, ptr %tmp56119		; <i32> [#uses=0]
-	br i1 false, label %bb.nph23, label %bb675.preheader
+  %tmp56119 = getelementptr %struct.TypHeader, ptr %hdR, i32 0, i32 0		; <ptr> [#uses=1]
+  %tmp56220 = load i32, ptr %tmp56119		; <i32> [#uses=0]
+  br i1 false, label %bb.nph23, label %bb675.preheader
 
 bb.nph23:		; preds = %bb556.preheader
-	ret ptr null
+  ret ptr null
 
 bb656:		; preds = %bb675.outer, %bb656
-	%tmp678 = load i32, ptr %tmp677		; <i32> [#uses=0]
-	br i1 false, label %bb684, label %bb656
+  %tmp678 = load i32, ptr %tmp677		; <i32> [#uses=0]
+  br i1 false, label %bb684, label %bb656
 
 bb684:		; preds = %bb675.outer, %bb656
-	br i1 false, label %bb924.preheader, label %bb675.outer
+  br i1 false, label %bb924.preheader, label %bb675.outer
 
 bb675.outer:		; preds = %bb675.preheader, %bb684
-	%tmp67812 = load i32, ptr %tmp67711		; <i32> [#uses=0]
-	br i1 false, label %bb684, label %bb656
+  %tmp67812 = load i32, ptr %tmp67711		; <i32> [#uses=0]
+  br i1 false, label %bb684, label %bb656
 
 bb675.preheader:		; preds = %bb556.preheader
-	%tmp67711 = getelementptr %struct.TypHeader, ptr %hdR, i32 0, i32 0		; <ptr> [#uses=1]
-	%tmp677 = getelementptr %struct.TypHeader, ptr %hdR, i32 0, i32 0		; <ptr> [#uses=1]
-	br label %bb675.outer
+  %tmp67711 = getelementptr %struct.TypHeader, ptr %hdR, i32 0, i32 0		; <ptr> [#uses=1]
+  %tmp677 = getelementptr %struct.TypHeader, ptr %hdR, i32 0, i32 0		; <ptr> [#uses=1]
+  br label %bb675.outer
 
 bb924.preheader:		; preds = %bb684
-	ret ptr null
+  ret ptr null
 }
diff --git a/llvm/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll b/llvm/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll
index 0b0597f..6fafce3 100644
--- a/llvm/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll
+++ b/llvm/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll
@@ -1,29 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn | llvm-dis
 
-	%struct.ggBRDF = type { ptr }
-	%struct.ggBox3 = type { %struct.ggPoint3, %struct.ggPoint3 }
-	%struct.ggMaterialRecord = type { %struct.ggPoint2, %struct.ggBox3, %struct.ggBox3, %struct.ggSpectrum, %struct.ggSpectrum, %struct.ggSpectrum, ptr, i32, i32, i32, i32 }
-	%struct.ggONB3 = type { %struct.ggPoint3, %struct.ggPoint3, %struct.ggPoint3 }
-	%struct.ggPoint2 = type { [2 x double] }
-	%struct.ggPoint3 = type { [3 x double] }
-	%struct.ggSpectrum = type { [8 x float] }
-	%struct.mrViewingHitRecord = type { double, %struct.ggPoint3, %struct.ggONB3, %struct.ggPoint2, double, %struct.ggSpectrum, %struct.ggSpectrum, i32, i32, i32, i32 }
-	%struct.mrXEllipticalCylinder = type { %struct.ggBRDF, float, float, float, float, float, float }
+  %struct.ggBRDF = type { ptr }
+  %struct.ggBox3 = type { %struct.ggPoint3, %struct.ggPoint3 }
+  %struct.ggMaterialRecord = type { %struct.ggPoint2, %struct.ggBox3, %struct.ggBox3, %struct.ggSpectrum, %struct.ggSpectrum, %struct.ggSpectrum, ptr, i32, i32, i32, i32 }
+  %struct.ggONB3 = type { %struct.ggPoint3, %struct.ggPoint3, %struct.ggPoint3 }
+  %struct.ggPoint2 = type { [2 x double] }
+  %struct.ggPoint3 = type { [3 x double] }
+  %struct.ggSpectrum = type { [8 x float] }
+  %struct.mrViewingHitRecord = type { double, %struct.ggPoint3, %struct.ggONB3, %struct.ggPoint2, double, %struct.ggSpectrum, %struct.ggSpectrum, i32, i32, i32, i32 }
+  %struct.mrXEllipticalCylinder = type { %struct.ggBRDF, float, float, float, float, float, float }
 
 define i32 @_ZNK21mrZEllipticalCylinder10viewingHitERK6ggRay3dddR18mrViewingHitRecordR16ggMaterialRecord(ptr %this, ptr %ray, double %unnamed_arg, double %tmin, double %tmax, ptr %VHR, ptr %unnamed_arg2) {
 entry:
-	%tmp80.i = getelementptr %struct.mrViewingHitRecord, ptr %VHR, i32 0, i32 1, i32 0, i32 0		; <ptr> [#uses=1]
-	store double 0.000000e+00, ptr %tmp80.i
-	br i1 false, label %return, label %cond_next.i
+  %tmp80.i = getelementptr %struct.mrViewingHitRecord, ptr %VHR, i32 0, i32 1, i32 0, i32 0		; <ptr> [#uses=1]
+  store double 0.000000e+00, ptr %tmp80.i
+  br i1 false, label %return, label %cond_next.i
 
 cond_next.i:		; preds = %entry
-	br i1 false, label %return, label %cond_true
+  br i1 false, label %return, label %cond_true
 
 cond_true:		; preds = %cond_next.i
-	%tmp3.i8 = getelementptr %struct.mrViewingHitRecord, ptr %VHR, i32 0, i32 1, i32 0, i32 0		; <ptr> [#uses=1]
-	%tmp46 = load double, ptr %tmp3.i8		; <double> [#uses=0]
-	ret i32 1
+  %tmp3.i8 = getelementptr %struct.mrViewingHitRecord, ptr %VHR, i32 0, i32 1, i32 0, i32 0		; <ptr> [#uses=1]
+  %tmp46 = load double, ptr %tmp3.i8		; <double> [#uses=0]
+  ret i32 1
 
 return:		; preds = %cond_next.i, %entry
-	ret i32 0
+  ret i32 0
 }
diff --git a/llvm/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll b/llvm/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll
index 8d3bfcd..a64901e 100644
--- a/llvm/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll
+++ b/llvm/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll
@@ -1,16 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn | llvm-dis
 
 @bsLive = external global i32		; <ptr> [#uses=2]
 
 define i32 @bsR(i32 %n) {
 entry:
-	br i1 false, label %cond_next, label %bb19
+  br i1 false, label %cond_next, label %bb19
 
 cond_next:		; preds = %entry
-	store i32 0, ptr @bsLive, align 4
-	br label %bb19
+  store i32 0, ptr @bsLive, align 4
+  br label %bb19
 
 bb19:		; preds = %cond_next, %entry
-	%tmp29 = load i32, ptr @bsLive, align 4		; <i32> [#uses=0]
-	ret i32 0
+  %tmp29 = load i32, ptr @bsLive, align 4		; <i32> [#uses=0]
+  ret i32 0
 }
diff --git a/llvm/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll b/llvm/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll
index 22d6432..46f9b84 100644
--- a/llvm/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll
+++ b/llvm/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll
@@ -20,7 +20,7 @@ define i32 @reload(ptr %first, i32 %global, ptr %dumpfile) {
 ; CHECK:       cond_next2943:
 ; CHECK-NEXT:    br i1 false, label [[BB2982_PREHEADER:%.*]], label [[BB2928]]
 ; CHECK:       bb2982.preheader:
-; CHECK-NEXT:    store i8 poison, ptr null
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
 ; CHECK-NEXT:    ret i32 poison
 ;
 cond_next2835.1:		; preds = %cond_next2861
diff --git a/llvm/test/Transforms/NewGVN/2007-07-30-PredIDom.ll b/llvm/test/Transforms/NewGVN/2007-07-30-PredIDom.ll
index 59da31c..c708460 100644
--- a/llvm/test/Transforms/NewGVN/2007-07-30-PredIDom.ll
+++ b/llvm/test/Transforms/NewGVN/2007-07-30-PredIDom.ll
@@ -1,274 +1,275 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn | llvm-dis
 
-	%"struct.Block::$_16" = type { i32 }
-	%struct.Exp = type { ptr, i32, i32, i32, ptr, ptr, %"struct.Exp::$_10", %"struct.Block::$_16", %"struct.Exp::$_12" }
-	%"struct.Exp::$_10" = type { ptr }
-	%"struct.Exp::$_12" = type { ptr }
-	%struct.Exp_ = type { i32, i32, i32, i32, ptr }
-	%struct.Id = type { ptr, i32, i32, i32, %"struct.Id::$_13" }
-	%"struct.Id::$_13" = type { double }
+  %"struct.Block::$_16" = type { i32 }
+  %struct.Exp = type { ptr, i32, i32, i32, ptr, ptr, %"struct.Exp::$_10", %"struct.Block::$_16", %"struct.Exp::$_12" }
+  %"struct.Exp::$_10" = type { ptr }
+  %"struct.Exp::$_12" = type { ptr }
+  %struct.Exp_ = type { i32, i32, i32, i32, ptr }
+  %struct.Id = type { ptr, i32, i32, i32, %"struct.Id::$_13" }
+  %"struct.Id::$_13" = type { double }
 
 define ptr @_ZN3Exp8toStringEj(ptr %this, i32 %nextpc) {
 entry:
-	switch i32 0, label %bb970 [
-		 i32 1, label %bb
-		 i32 2, label %bb39
-		 i32 3, label %bb195
-		 i32 4, label %bb270
-		 i32 5, label %bb418
-		 i32 6, label %bb633
-		 i32 7, label %bb810
-		 i32 8, label %bb882
-		 i32 9, label %bb925
-	]
+  switch i32 0, label %bb970 [
+  i32 1, label %bb
+  i32 2, label %bb39
+  i32 3, label %bb195
+  i32 4, label %bb270
+  i32 5, label %bb418
+  i32 6, label %bb633
+  i32 7, label %bb810
+  i32 8, label %bb882
+  i32 9, label %bb925
+  ]
 
 bb:		; preds = %entry
-	store ptr null, ptr null
-	br label %return
+  store ptr null, ptr null
+  br label %return
 
 bb39:		; preds = %entry
-	br i1 false, label %cond_true, label %cond_false132
+  br i1 false, label %cond_true, label %cond_false132
 
 cond_true:		; preds = %bb39
-	br i1 false, label %cond_true73, label %cond_false
+  br i1 false, label %cond_true73, label %cond_false
 
 cond_true73:		; preds = %cond_true
-	br i1 false, label %cond_true108, label %cond_next
+  br i1 false, label %cond_true108, label %cond_next
 
 cond_true108:		; preds = %cond_true73
-	br label %cond_next
+  br label %cond_next
 
 cond_next:		; preds = %cond_true108, %cond_true73
-	br label %cond_next131
+  br label %cond_next131
 
 cond_false:		; preds = %cond_true
-	br label %cond_next131
+  br label %cond_next131
 
 cond_next131:		; preds = %cond_false, %cond_next
-	br label %cond_next141
+  br label %cond_next141
 
 cond_false132:		; preds = %bb39
-	br label %cond_next141
+  br label %cond_next141
 
 cond_next141:		; preds = %cond_false132, %cond_next131
-	br i1 false, label %cond_true169, label %cond_false175
+  br i1 false, label %cond_true169, label %cond_false175
 
 cond_true169:		; preds = %cond_next141
-	br label %cond_next181
+  br label %cond_next181
 
 cond_false175:		; preds = %cond_next141
-	br label %cond_next181
+  br label %cond_next181
 
 cond_next181:		; preds = %cond_false175, %cond_true169
-	br i1 false, label %cond_true189, label %cond_next191
+  br i1 false, label %cond_true189, label %cond_next191
 
 cond_true189:		; preds = %cond_next181
-	br label %cond_next191
+  br label %cond_next191
 
 cond_next191:		; preds = %cond_true189, %cond_next181
-	store ptr null, ptr null
-	br label %return
+  store ptr null, ptr null
+  br label %return
 
 bb195:		; preds = %entry
-	br i1 false, label %cond_true248, label %cond_false250
+  br i1 false, label %cond_true248, label %cond_false250
 
 cond_true248:		; preds = %bb195
-	br label %cond_next252
+  br label %cond_next252
 
 cond_false250:		; preds = %bb195
-	br label %cond_next252
+  br label %cond_next252
 
 cond_next252:		; preds = %cond_false250, %cond_true248
-	br i1 false, label %cond_true265, label %cond_next267
+  br i1 false, label %cond_true265, label %cond_next267
 
 cond_true265:		; preds = %cond_next252
-	br label %cond_next267
+  br label %cond_next267
 
 cond_next267:		; preds = %cond_true265, %cond_next252
-	store ptr null, ptr null
-	br label %return
+  store ptr null, ptr null
+  br label %return
 
 bb270:		; preds = %entry
-	br i1 false, label %cond_true338, label %cond_false340
+  br i1 false, label %cond_true338, label %cond_false340
 
 cond_true338:		; preds = %bb270
-	br label %cond_next342
+  br label %cond_next342
 
 cond_false340:		; preds = %bb270
-	br label %cond_next342
+  br label %cond_next342
 
 cond_next342:		; preds = %cond_false340, %cond_true338
-	br i1 false, label %cond_true362, label %cond_false364
+  br i1 false, label %cond_true362, label %cond_false364
 
 cond_true362:		; preds = %cond_next342
-	br label %cond_next366
+  br label %cond_next366
 
 cond_false364:		; preds = %cond_next342
-	br label %cond_next366
+  br label %cond_next366
 
 cond_next366:		; preds = %cond_false364, %cond_true362
-	br i1 false, label %cond_true393, label %cond_next395
+  br i1 false, label %cond_true393, label %cond_next395
 
 cond_true393:		; preds = %cond_next366
-	br label %cond_next395
+  br label %cond_next395
 
 cond_next395:		; preds = %cond_true393, %cond_next366
-	br i1 false, label %cond_true406, label %cond_next408
+  br i1 false, label %cond_true406, label %cond_next408
 
 cond_true406:		; preds = %cond_next395
-	br label %cond_next408
+  br label %cond_next408
 
 cond_next408:		; preds = %cond_true406, %cond_next395
-	br i1 false, label %cond_true413, label %cond_next415
+  br i1 false, label %cond_true413, label %cond_next415
 
 cond_true413:		; preds = %cond_next408
-	br label %cond_next415
+  br label %cond_next415
 
 cond_next415:		; preds = %cond_true413, %cond_next408
-	store ptr null, ptr null
-	br label %return
+  store ptr null, ptr null
+  br label %return
 
 bb418:		; preds = %entry
-	br i1 false, label %cond_true512, label %cond_false514
+  br i1 false, label %cond_true512, label %cond_false514
 
 cond_true512:		; preds = %bb418
-	br label %cond_next516
+  br label %cond_next516
 
 cond_false514:		; preds = %bb418
-	br label %cond_next516
+  br label %cond_next516
 
 cond_next516:		; preds = %cond_false514, %cond_true512
-	br i1 false, label %cond_true536, label %cond_false538
+  br i1 false, label %cond_true536, label %cond_false538
 
 cond_true536:		; preds = %cond_next516
-	br label %cond_next540
+  br label %cond_next540
 
 cond_false538:		; preds = %cond_next516
-	br label %cond_next540
+  br label %cond_next540
 
 cond_next540:		; preds = %cond_false538, %cond_true536
-	br i1 false, label %cond_true560, label %cond_false562
+  br i1 false, label %cond_true560, label %cond_false562
 
 cond_true560:		; preds = %cond_next540
-	br label %cond_next564
+  br label %cond_next564
 
 cond_false562:		; preds = %cond_next540
-	br label %cond_next564
+  br label %cond_next564
 
 cond_next564:		; preds = %cond_false562, %cond_true560
-	br i1 false, label %cond_true597, label %cond_next599
+  br i1 false, label %cond_true597, label %cond_next599
 
 cond_true597:		; preds = %cond_next564
-	br label %cond_next599
+  br label %cond_next599
 
 cond_next599:		; preds = %cond_true597, %cond_next564
-	br i1 false, label %cond_true614, label %cond_next616
+  br i1 false, label %cond_true614, label %cond_next616
 
 cond_true614:		; preds = %cond_next599
-	br label %cond_next616
+  br label %cond_next616
 
 cond_next616:		; preds = %cond_true614, %cond_next599
-	br i1 false, label %cond_true621, label %cond_next623
+  br i1 false, label %cond_true621, label %cond_next623
 
 cond_true621:		; preds = %cond_next616
-	br label %cond_next623
+  br label %cond_next623
 
 cond_next623:		; preds = %cond_true621, %cond_next616
-	br i1 false, label %cond_true628, label %cond_next630
+  br i1 false, label %cond_true628, label %cond_next630
 
 cond_true628:		; preds = %cond_next623
-	br label %cond_next630
+  br label %cond_next630
 
 cond_next630:		; preds = %cond_true628, %cond_next623
-	store ptr null, ptr null
-	br label %return
+  store ptr null, ptr null
+  br label %return
 
 bb633:		; preds = %entry
-	br i1 false, label %cond_true667, label %cond_next669
+  br i1 false, label %cond_true667, label %cond_next669
 
 cond_true667:		; preds = %bb633
-	br label %cond_next669
+  br label %cond_next669
 
 cond_next669:		; preds = %cond_true667, %bb633
-	br i1 false, label %cond_true678, label %cond_next791
+  br i1 false, label %cond_true678, label %cond_next791
 
 cond_true678:		; preds = %cond_next669
-	br label %bb735
+  br label %bb735
 
 bb679:		; preds = %bb735
-	br i1 false, label %cond_true729, label %cond_next731
+  br i1 false, label %cond_true729, label %cond_next731
 
 cond_true729:		; preds = %bb679
-	br label %cond_next731
+  br label %cond_next731
 
 cond_next731:		; preds = %cond_true729, %bb679
-	br label %bb735
+  br label %bb735
 
 bb735:		; preds = %cond_next731, %cond_true678
-	br i1 false, label %bb679, label %bb743
+  br i1 false, label %bb679, label %bb743
 
 bb743:		; preds = %bb735
-	br i1 false, label %cond_true788, label %cond_next790
+  br i1 false, label %cond_true788, label %cond_next790
 
 cond_true788:		; preds = %bb743
-	br label %cond_next790
+  br label %cond_next790
 
 cond_next790:		; preds = %cond_true788, %bb743
-	br label %cond_next791
+  br label %cond_next791
 
 cond_next791:		; preds = %cond_next790, %cond_next669
-	br i1 false, label %cond_true805, label %cond_next807
+  br i1 false, label %cond_true805, label %cond_next807
 
 cond_true805:		; preds = %cond_next791
-	br label %cond_next807
+  br label %cond_next807
 
 cond_next807:		; preds = %cond_true805, %cond_next791
-	store ptr null, ptr null
-	br label %return
+  store ptr null, ptr null
+  br label %return
 
 bb810:		; preds = %entry
-	br i1 false, label %cond_true870, label %cond_next872
+  br i1 false, label %cond_true870, label %cond_next872
 
 cond_true870:		; preds = %bb810
-	br label %cond_next872
+  br label %cond_next872
 
 cond_next872:		; preds = %cond_true870, %bb810
-	br i1 false, label %cond_true877, label %cond_next879
+  br i1 false, label %cond_true877, label %cond_next879
 
 cond_true877:		; preds = %cond_next872
-	br label %cond_next879
+  br label %cond_next879
 
 cond_next879:		; preds = %cond_true877, %cond_next872
-	store ptr null, ptr null
-	br label %return
+  store ptr null, ptr null
+  br label %return
 
 bb882:		; preds = %entry
-	br i1 false, label %cond_true920, label %cond_next922
+  br i1 false, label %cond_true920, label %cond_next922
 
 cond_true920:		; preds = %bb882
-	br label %cond_next922
+  br label %cond_next922
 
 cond_next922:		; preds = %cond_true920, %bb882
-	store ptr null, ptr null
-	br label %return
+  store ptr null, ptr null
+  br label %return
 
 bb925:		; preds = %entry
-	br i1 false, label %cond_true965, label %cond_next967
+  br i1 false, label %cond_true965, label %cond_next967
 
 cond_true965:		; preds = %bb925
-	br label %cond_next967
+  br label %cond_next967
 
 cond_next967:		; preds = %cond_true965, %bb925
-	store ptr null, ptr null
-	br label %return
+  store ptr null, ptr null
+  br label %return
 
 bb970:		; preds = %entry
-	unreachable
-		; No predecessors!
-	store ptr null, ptr null
-	br label %return
+  unreachable
+  ; No predecessors!
+  store ptr null, ptr null
+  br label %return
 
 return:		; preds = %0, %cond_next967, %cond_next922, %cond_next879, %cond_next807, %cond_next630, %cond_next415, %cond_next267, %cond_next191, %bb
-	%retval980 = load ptr, ptr null		; <ptr> [#uses=1]
-	ret ptr %retval980
+  %retval980 = load ptr, ptr null		; <ptr> [#uses=1]
+  ret ptr %retval980
 }
diff --git a/llvm/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll b/llvm/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll
index 934fffc..5411bcc 100644
--- a/llvm/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll
+++ b/llvm/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll
@@ -1,23 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 
 @img_width = external global i16		; <ptr> [#uses=2]
 
 define i32 @smpUMHEXBipredIntegerPelBlockMotionSearch(ptr %cur_pic, i16 signext  %ref, i32 %list, i32 %pic_pix_x, i32 %pic_pix_y, i32 %blocktype, i16 signext  %pred_mv_x1, i16 signext  %pred_mv_y1, i16 signext  %pred_mv_x2, i16 signext  %pred_mv_y2, ptr %mv_x, ptr %mv_y, ptr %s_mv_x, ptr %s_mv_y, i32 %search_range, i32 %min_mcost, i32 %lambda_factor) {
+; CHECK-LABEL: define i32 @smpUMHEXBipredIntegerPelBlockMotionSearch(
+; CHECK-SAME: ptr [[CUR_PIC:%.*]], i16 signext [[REF:%.*]], i32 [[LIST:%.*]], i32 [[PIC_PIX_X:%.*]], i32 [[PIC_PIX_Y:%.*]], i32 [[BLOCKTYPE:%.*]], i16 signext [[PRED_MV_X1:%.*]], i16 signext [[PRED_MV_Y1:%.*]], i16 signext [[PRED_MV_X2:%.*]], i16 signext [[PRED_MV_Y2:%.*]], ptr [[MV_X:%.*]], ptr [[MV_Y:%.*]], ptr [[S_MV_X:%.*]], ptr [[S_MV_Y:%.*]], i32 [[SEARCH_RANGE:%.*]], i32 [[MIN_MCOST:%.*]], i32 [[LAMBDA_FACTOR:%.*]]) {
+; CHECK-NEXT:  cond_next143:
+; CHECK-NEXT:    store i16 0, ptr @img_width, align 2
+; CHECK-NEXT:    br i1 false, label [[COND_NEXT449:%.*]], label [[COND_FALSE434:%.*]]
+; CHECK:       cond_false434:
+; CHECK-NEXT:    br label [[COND_NEXT449]]
+; CHECK:       cond_next449:
+; CHECK-NEXT:    br i1 false, label [[COND_NEXT698:%.*]], label [[COND_FALSE470:%.*]]
+; CHECK:       cond_false470:
+; CHECK-NEXT:    br label [[COND_NEXT698]]
+; CHECK:       cond_next698:
+; CHECK-NEXT:    ret i32 0
+;
 cond_next143:		; preds = %entry
-	store i16 0, ptr @img_width, align 2
-	br i1 false, label %cond_next449, label %cond_false434
+  store i16 0, ptr @img_width, align 2
+  br i1 false, label %cond_next449, label %cond_false434
 
 cond_false434:		; preds = %cond_true415
-	br label %cond_next449
+  br label %cond_next449
 
 cond_next449:		; preds = %cond_false434, %cond_true415
-	br i1 false, label %cond_next698, label %cond_false470
+  br i1 false, label %cond_next698, label %cond_false470
 
 cond_false470:		; preds = %cond_next449
-	br label %cond_next698
+  br label %cond_next698
 
 cond_next698:		; preds = %cond_true492
-	%tmp701 = load i16, ptr @img_width, align 2		; <i16> [#uses=0]
-; CHECK-NOT: %tmp701 =
-	ret i32 0
+  %tmp701 = load i16, ptr @img_width, align 2		; <i16> [#uses=0]
+  ret i32 0
 }
diff --git a/llvm/test/Transforms/NewGVN/2008-02-13-NewPHI.ll b/llvm/test/Transforms/NewGVN/2008-02-13-NewPHI.ll
index b2440fb..49d6de7 100644
--- a/llvm/test/Transforms/NewGVN/2008-02-13-NewPHI.ll
+++ b/llvm/test/Transforms/NewGVN/2008-02-13-NewPHI.ll
@@ -1,22 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn
 ; PR2032
 
 define i32 @sscal(i32 %n, double %sa1, ptr %sx, i32 %incx) {
 entry:
-	%sx_addr = alloca ptr		; <ptr> [#uses=3]
-	store ptr %sx, ptr %sx_addr, align 4
-	br label %bb33
+  %sx_addr = alloca ptr		; <ptr> [#uses=3]
+  store ptr %sx, ptr %sx_addr, align 4
+  br label %bb33
 
 bb:		; preds = %bb33
-	%tmp27 = load ptr, ptr %sx_addr, align 4		; <ptr> [#uses=1]
-	store float 0.000000e+00, ptr %tmp27, align 4
-	store ptr null, ptr %sx_addr, align 4
-	br label %bb33
+  %tmp27 = load ptr, ptr %sx_addr, align 4		; <ptr> [#uses=1]
+  store float 0.000000e+00, ptr %tmp27, align 4
+  store ptr null, ptr %sx_addr, align 4
+  br label %bb33
 
 bb33:		; preds = %bb, %entry
-	br i1 false, label %bb, label %return
+  br i1 false, label %bb, label %return
 
 return:		; preds = %bb33
-	%retval59 = load i32, ptr null, align 4		; <i32> [#uses=1]
-	ret i32 %retval59
+  %retval59 = load i32, ptr null, align 4		; <i32> [#uses=1]
+  ret i32 %retval59
 }
diff --git a/llvm/test/Transforms/NewGVN/2008-07-02-Unreachable.ll b/llvm/test/Transforms/NewGVN/2008-07-02-Unreachable.ll
index 0c1891e..cf591d7 100644
--- a/llvm/test/Transforms/NewGVN/2008-07-02-Unreachable.ll
+++ b/llvm/test/Transforms/NewGVN/2008-07-02-Unreachable.ll
@@ -1,36 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 ; PR2503
 
 @g_3 = external global i8		; <ptr> [#uses=2]
 
 define i8 @func_1(i32 %x, i32 %y) nounwind  {
+; CHECK-LABEL: define i8 @func_1(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[IFELSE:%.*]], label [[IFTHEN:%.*]]
+; CHECK:       ifthen:
+; CHECK-NEXT:    br label [[IFEND:%.*]]
+; CHECK:       ifelse:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr @g_3, align 1
+; CHECK-NEXT:    store i8 [[TMP3]], ptr [[A]], align 1
+; CHECK-NEXT:    br label [[AFTERFOR:%.*]]
+; CHECK:       forcond:
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
+; CHECK-NEXT:    br i1 false, label [[AFTERFOR]], label [[FORBODY:%.*]]
+; CHECK:       forbody:
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
+; CHECK-NEXT:    br label [[FORINC:%.*]]
+; CHECK:       forinc:
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
+; CHECK-NEXT:    br label [[FORCOND:%.*]]
+; CHECK:       afterfor:
+; CHECK-NEXT:    ret i8 [[TMP3]]
+; CHECK:       ifend:
+; CHECK-NEXT:    ret i8 0
+;
 entry:
   %A = alloca i8
-    %cmp = icmp eq i32 %x, %y
-	br i1 %cmp, label %ifelse, label %ifthen
+  %cmp = icmp eq i32 %x, %y
+  br i1 %cmp, label %ifelse, label %ifthen
 
 ifthen:		; preds = %entry
-	br label %ifend
+  br label %ifend
 
 ifelse:		; preds = %entry
-	%tmp3 = load i8, ptr @g_3		; <i8> [#uses=0]
-        store i8 %tmp3, ptr %A
-	br label %afterfor
+  %tmp3 = load i8, ptr @g_3		; <i8> [#uses=0]
+  store i8 %tmp3, ptr %A
+  br label %afterfor
 
 forcond:		; preds = %forinc
-	br i1 false, label %afterfor, label %forbody
+  br i1 false, label %afterfor, label %forbody
 
 forbody:		; preds = %forcond
-	br label %forinc
+  br label %forinc
 
 forinc:		; preds = %forbody
-	br label %forcond
+  br label %forcond
 
 afterfor:		; preds = %forcond, %forcond.thread
-	%tmp10 = load i8, ptr @g_3		; <i8> [#uses=0]
-	ret i8 %tmp10
-; CHECK: ret i8 %tmp3
+  %tmp10 = load i8, ptr @g_3		; <i8> [#uses=0]
+  ret i8 %tmp10
 
 ifend:		; preds = %afterfor, %ifthen
-	ret i8 0
+  ret i8 0
 }
diff --git a/llvm/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll b/llvm/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll
index a2e252d..95cb229 100644
--- a/llvm/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll
+++ b/llvm/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll
@@ -1,38 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
-	%struct.anon = type { ptr, i32 }
-	%struct.d_print_info = type { i32, ptr, i32, i32, ptr, ptr, i32 }
-	%struct.d_print_mod = type { ptr, ptr, i32, ptr }
-	%struct.d_print_template = type { ptr, ptr }
-	%struct.demangle_component = type { i32, { %struct.anon } }
+  %struct.anon = type { ptr, i32 }
+  %struct.d_print_info = type { i32, ptr, i32, i32, ptr, ptr, i32 }
+  %struct.d_print_mod = type { ptr, ptr, i32, ptr }
+  %struct.d_print_template = type { ptr, ptr }
+  %struct.demangle_component = type { i32, { %struct.anon } }
 
 define void @d_print_mod_list(ptr %dpi, ptr %mods, i32 %suffix) nounwind {
+; CHECK-LABEL: define void @d_print_mod_list(
+; CHECK-SAME: ptr [[DPI:%.*]], ptr [[MODS:%.*]], i32 [[SUFFIX:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_D_PRINT_INFO:%.*]], ptr [[DPI]], i32 0, i32 1
+; CHECK-NEXT:    br i1 false, label [[RETURN:%.*]], label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    br label [[BB21:%.*]]
+; CHECK:       bb21:
+; CHECK-NEXT:    br label [[BB21]]
+; CHECK:       return:
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
+; CHECK-NEXT:    ret void
+;
 entry:
-	%0 = getelementptr %struct.d_print_info, ptr %dpi, i32 0, i32 1		; <ptr> [#uses=1]
-	br i1 false, label %return, label %bb
+  %0 = getelementptr %struct.d_print_info, ptr %dpi, i32 0, i32 1		; <ptr> [#uses=1]
+  br i1 false, label %return, label %bb
 
 bb:		; preds = %entry
-	%1 = load ptr, ptr %0, align 4		; <ptr> [#uses=0]
-	%2 = getelementptr %struct.d_print_info, ptr %dpi, i32 0, i32 1		; <ptr> [#uses=0]
-	br label %bb21
+  %1 = load ptr, ptr %0, align 4		; <ptr> [#uses=0]
+  %2 = getelementptr %struct.d_print_info, ptr %dpi, i32 0, i32 1		; <ptr> [#uses=0]
+  br label %bb21
 
 bb21:		; preds = %bb21, %bb
-	br label %bb21
+  br label %bb21
 
 return:		; preds = %entry
-	ret void
+  ret void
 }
 
-; CHECK: define void @d_print_mod_list(ptr %dpi, ptr %mods, i32 %suffix) #0 {
-; CHECK: entry:
-; CHECK:   %0 = getelementptr %struct.d_print_info, ptr %dpi, i32 0, i32 1
-; CHECK:   br i1 false, label %return, label %bb
-; CHECK: bb:
-; CHECK:   br label %bb21
-; CHECK: bb21:
-; CHECK:   br label %bb21
-; CHECK: return:
-; CHECK:   ret void
-; CHECK: }
diff --git a/llvm/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll b/llvm/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll
index bb51f72..3df35a1 100644
--- a/llvm/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll
+++ b/llvm/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll
@@ -1,35 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn | llvm-dis
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
 define i32 @main(i32 %argc, ptr %argv) nounwind {
 entry:
-	br label %bb84
+  br label %bb84
 
 bb41:		; preds = %bb82
-	%tmp = load i8, ptr %opt.0, align 1		; <i8> [#uses=0]
-	%tmp1 = getelementptr i8, ptr %opt.0, i32 1		; <ptr> [#uses=2]
-	switch i32 0, label %bb81 [
-		i32 102, label %bb82
-		i32 110, label %bb79
-		i32 118, label %bb80
-	]
+  %tmp = load i8, ptr %opt.0, align 1		; <i8> [#uses=0]
+  %tmp1 = getelementptr i8, ptr %opt.0, i32 1		; <ptr> [#uses=2]
+  switch i32 0, label %bb81 [
+  i32 102, label %bb82
+  i32 110, label %bb79
+  i32 118, label %bb80
+  ]
 
 bb79:		; preds = %bb41
-	br label %bb82
+  br label %bb82
 
 bb80:		; preds = %bb41
-	ret i32 0
+  ret i32 0
 
 bb81:		; preds = %bb41
-	ret i32 1
+  ret i32 1
 
 bb82:		; preds = %bb84, %bb79, %bb41
-	%opt.0 = phi ptr [ %tmp3, %bb84 ], [ %tmp1, %bb79 ], [ %tmp1, %bb41 ]		; <ptr> [#uses=3]
-	%tmp2 = load i8, ptr %opt.0, align 1		; <i8> [#uses=0]
-	br i1 false, label %bb84, label %bb41
+  %opt.0 = phi ptr [ %tmp3, %bb84 ], [ %tmp1, %bb79 ], [ %tmp1, %bb41 ]		; <ptr> [#uses=3]
+  %tmp2 = load i8, ptr %opt.0, align 1		; <i8> [#uses=0]
+  br i1 false, label %bb84, label %bb41
 
 bb84:		; preds = %bb82, %entry
-	%tmp3 = getelementptr i8, ptr null, i32 1		; <ptr> [#uses=1]
-	br label %bb82
+  %tmp3 = getelementptr i8, ptr null, i32 1		; <ptr> [#uses=1]
+  br label %bb82
 }
diff --git a/llvm/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll b/llvm/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll
index 38d1240..8213556 100644
--- a/llvm/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll
+++ b/llvm/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn | llvm-dis
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
@@ -5,14 +6,14 @@ target triple = "i386-apple-darwin7"
 
 define i32 @Quiesce(i32 %alpha, i32 %beta, i32 %wtm, i32 %ply) nounwind {
 entry:
-	br label %bb22
+  br label %bb22
 
 bb22:		; preds = %bb23, %bb22, %entry
-	br i1 false, label %bb23, label %bb22
+  br i1 false, label %bb23, label %bb22
 
 bb23:		; preds = %bb23, %bb22
-	%sortv.233 = phi ptr [ @sort_value, %bb22 ], [ %sortv.2, %bb23 ]		; <ptr> [#uses=1]
-	%0 = load i32, ptr %sortv.233, align 4		; <i32> [#uses=0]
-	%sortv.2 = getelementptr [256 x i32], ptr @sort_value, i32 0, i32 0		; <ptr> [#uses=1]
-	br i1 false, label %bb23, label %bb22
+  %sortv.233 = phi ptr [ @sort_value, %bb22 ], [ %sortv.2, %bb23 ]		; <ptr> [#uses=1]
+  %0 = load i32, ptr %sortv.233, align 4		; <i32> [#uses=0]
+  %sortv.2 = getelementptr [256 x i32], ptr @sort_value, i32 0, i32 0		; <ptr> [#uses=1]
+  br i1 false, label %bb23, label %bb22
 }
diff --git a/llvm/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll b/llvm/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll
index 0b1761d..ff9ecce 100644
--- a/llvm/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll
+++ b/llvm/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll
@@ -1,28 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn | llvm-dis
 ; Cached results must be added to and verified against the visited sets.
 ; PR3217
 
 define fastcc void @gen_field_die(ptr %decl) nounwind {
 entry:
-	br i1 false, label %bb203, label %bb202
+  br i1 false, label %bb203, label %bb202
 
 bb202:		; preds = %entry
-	unreachable
+  unreachable
 
 bb203:		; preds = %entry
-	%tmp = getelementptr i32, ptr %decl, i32 1		; <ptr> [#uses=1]
-	%tmp1 = load i32, ptr %tmp, align 4		; <i32> [#uses=0]
-	br i1 false, label %bb207, label %bb204
+  %tmp = getelementptr i32, ptr %decl, i32 1		; <ptr> [#uses=1]
+  %tmp1 = load i32, ptr %tmp, align 4		; <i32> [#uses=0]
+  br i1 false, label %bb207, label %bb204
 
 bb204:		; preds = %bb203
-	%tmp2 = getelementptr i32, ptr %decl, i32 1		; <ptr> [#uses=1]
-	br label %bb208
+  %tmp2 = getelementptr i32, ptr %decl, i32 1		; <ptr> [#uses=1]
+  br label %bb208
 
 bb207:		; preds = %bb203
-	br label %bb208
+  br label %bb208
 
 bb208:		; preds = %bb207, %bb204
-	%iftmp.1374.0.in = phi ptr [ null, %bb207 ], [ %tmp2, %bb204 ]		; <ptr> [#uses=1]
-	%iftmp.1374.0 = load i32, ptr %iftmp.1374.0.in		; <i32> [#uses=0]
-	unreachable
+  %iftmp.1374.0.in = phi ptr [ null, %bb207 ], [ %tmp2, %bb204 ]		; <ptr> [#uses=1]
+  %iftmp.1374.0 = load i32, ptr %iftmp.1374.0.in		; <i32> [#uses=0]
+  unreachable
 }
diff --git a/llvm/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll b/llvm/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll
index 8631cbd..1f0a32a 100644
--- a/llvm/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll
+++ b/llvm/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll
@@ -1,55 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn | llvm-dis
 ; PR3358
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
-	%struct.re_pattern_buffer = type { ptr, i64, i64, i64, ptr, ptr, i64, i8 }
-	%struct.re_registers = type { i32, ptr, ptr }
+  %struct.re_pattern_buffer = type { ptr, i64, i64, i64, ptr, ptr, i64, i8 }
+  %struct.re_registers = type { i32, ptr, ptr }
 
 define fastcc i32 @byte_re_match_2_internal(ptr nocapture %bufp, ptr %string1, i32 %size1, ptr %string2, i32 %size2, i32 %pos, ptr %regs, i32 %stop) nounwind {
 entry:
-	br label %bb159
+  br label %bb159
 
 succeed_label:		; preds = %bb159
-	ret i32 0
+  ret i32 0
 
 bb159:		; preds = %bb664, %bb554, %bb159, %bb159, %bb159, %entry
-	%d.0 = phi ptr [ null, %entry ], [ %d.0, %bb159 ], [ %d.0, %bb554 ], [ %d.0, %bb159 ], [ %d.0, %bb159 ], [ %d.12, %bb664 ]		; <ptr> [#uses=5]
-	switch i32 0, label %bb661 [
-		i32 0, label %bb159
-		i32 1, label %succeed_label
-		i32 13, label %bb159
-		i32 14, label %bb159
-		i32 16, label %bb411
-		i32 24, label %bb622
-		i32 28, label %bb543
-	]
+  %d.0 = phi ptr [ null, %entry ], [ %d.0, %bb159 ], [ %d.0, %bb554 ], [ %d.0, %bb159 ], [ %d.0, %bb159 ], [ %d.12, %bb664 ]		; <ptr> [#uses=5]
+  switch i32 0, label %bb661 [
+  i32 0, label %bb159
+  i32 1, label %succeed_label
+  i32 13, label %bb159
+  i32 14, label %bb159
+  i32 16, label %bb411
+  i32 24, label %bb622
+  i32 28, label %bb543
+  ]
 
 bb411:		; preds = %bb411, %bb159
-	br label %bb411
+  br label %bb411
 
 bb543:		; preds = %bb159
-	br i1 false, label %bb549, label %bb550
+  br i1 false, label %bb549, label %bb550
 
 bb549:		; preds = %bb543
-	br label %bb554
+  br label %bb554
 
 bb550:		; preds = %bb543
-	br i1 false, label %bb554, label %bb552
+  br i1 false, label %bb554, label %bb552
 
 bb552:		; preds = %bb550
-	%0 = load i8, ptr %d.0, align 8		; <i8> [#uses=0]
-	br label %bb554
+  %0 = load i8, ptr %d.0, align 8		; <i8> [#uses=0]
+  br label %bb554
 
 bb554:		; preds = %bb552, %bb550, %bb549
-	br i1 false, label %bb159, label %bb661
+  br i1 false, label %bb159, label %bb661
 
 bb622:		; preds = %bb622, %bb159
-	br label %bb622
+  br label %bb622
 
 bb661:		; preds = %bb554, %bb159
-	%d.12 = select i1 false, ptr null, ptr null		; <ptr> [#uses=1]
-	br label %bb664
+  %d.12 = select i1 false, ptr null, ptr null		; <ptr> [#uses=1]
+  br label %bb664
 
 bb664:		; preds = %bb664, %bb661
-	br i1 false, label %bb159, label %bb664
+  br i1 false, label %bb159, label %bb664
 }
diff --git a/llvm/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll b/llvm/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll
index d8871700d..25f73dc 100644
--- a/llvm/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll
+++ b/llvm/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll
@@ -1,100 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn | llvm-dis
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
-	%struct..4sPragmaType = type { ptr, i32 }
-	%struct.AggInfo = type { i8, i8, i32, ptr, i32, ptr, i32, i32, i32, ptr, i32, i32 }
-	%struct.AggInfo_col = type { ptr, i32, i32, i32, i32, ptr }
-	%struct.AggInfo_func = type { ptr, ptr, i32, i32 }
-	%struct.AuxData = type { ptr, ptr }
-	%struct.Bitvec = type { i32, i32, i32, { [125 x i32] } }
-	%struct.BtCursor = type { ptr, ptr, ptr, ptr, ptr, ptr, i32, ptr, i32, %struct.CellInfo, i8, i8, ptr, i64, i32, i8, ptr }
-	%struct.BtLock = type { ptr, i32, i8, ptr }
-	%struct.BtShared = type { ptr, ptr, ptr, ptr, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, ptr, ptr, ptr, %struct.BusyHandler, i32, ptr, ptr, ptr }
-	%struct.Btree = type { ptr, ptr, i8, i8, i8, i32, ptr, ptr }
-	%struct.BtreeMutexArray = type { i32, [11 x ptr] }
-	%struct.BusyHandler = type { ptr, ptr, i32 }
-	%struct.CellInfo = type { ptr, i64, i32, i32, i16, i16, i16, i16 }
-	%struct.CollSeq = type { ptr, i8, i8, ptr, ptr, ptr }
-	%struct.Column = type { ptr, ptr, ptr, ptr, i8, i8, i8, i8 }
-	%struct.Context = type { i64, i32, %struct.Fifo }
-	%struct.CountCtx = type { i64 }
-	%struct.Cursor = type { ptr, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, ptr, i32, ptr, i64, ptr, ptr, i32, i64, ptr, ptr, i32, i32, ptr, ptr, ptr }
-	%struct.Db = type { ptr, ptr, i8, i8, ptr, ptr, ptr }
-	%struct.Expr = type { i8, i8, i16, ptr, ptr, ptr, ptr, %struct..4sPragmaType, %struct..4sPragmaType, i32, i32, ptr, i32, i32, ptr, ptr, i32 }
-	%struct.ExprList = type { i32, i32, i32, ptr }
-	%struct.ExprList_item = type { ptr, ptr, i8, i8, i8 }
-	%struct.FKey = type { ptr, ptr, ptr, ptr, i32, ptr, i8, i8, i8, i8 }
-	%struct.Fifo = type { i32, ptr, ptr }
-	%struct.FifoPage = type { i32, i32, i32, ptr, [1 x i64] }
-	%struct.FuncDef = type { i16, i8, i8, i8, ptr, ptr, ptr, ptr, ptr, [1 x i8] }
-	%struct.Hash = type { i8, i8, i32, i32, ptr, ptr }
-	%struct.HashElem = type { ptr, ptr, ptr, ptr, i32 }
-	%struct.IdList = type { ptr, i32, i32 }
-	%struct.Index = type { ptr, i32, ptr, ptr, ptr, i32, i8, i8, ptr, ptr, ptr, ptr, ptr }
-	%struct.KeyInfo = type { ptr, i8, i8, i8, i32, ptr, [1 x ptr] }
-	%struct.Mem = type { %struct.CountCtx, double, ptr, ptr, i32, i16, i8, i8, ptr }
-	%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], ptr, ptr, ptr, i32, ptr }
-	%struct.Module = type { ptr, ptr, ptr, ptr }
-	%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, { i32 } }
-	%struct.Pager = type { ptr, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, %struct.PagerLruList, ptr, ptr, ptr, i64, i64, i64, i64, i64, i32, ptr, ptr, i32, ptr, ptr, [16 x i8] }
-	%struct.PagerLruLink = type { ptr, ptr }
-	%struct.PagerLruList = type { ptr, ptr, ptr }
-	%struct.Parse = type { ptr, i32, ptr, ptr, i8, i8, i8, i8, i8, i8, i8, [8 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x i32], i32, ptr, i32, i32, i32, i32, i32, ptr, i8, %struct..4sPragmaType, %struct..4sPragmaType, %struct..4sPragmaType, ptr, ptr, ptr, ptr, ptr, ptr, %struct..4sPragmaType, i8, ptr, i32 }
-	%struct.PgHdr = type { ptr, i32, ptr, ptr, %struct.PagerLruLink, ptr, i8, i8, i8, i8, i8, i16, ptr, ptr, ptr }
-	%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, ptr, i8, i8, i16, i32, ptr }
-	%struct.Select = type { ptr, i8, i8, i8, i8, i8, i8, i8, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, [3 x i32] }
-	%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
-	%struct.SrcList_item = type { ptr, ptr, ptr, ptr, ptr, i8, i8, i32, ptr, ptr, i64 }
-	%struct.Table = type { ptr, i32, ptr, i32, ptr, i32, ptr, i32, ptr, ptr, ptr, ptr, i32, i8, i8, i8, i8, i8, i8, i8, ptr, ptr, i32, ptr, ptr }
-	%struct.TableLock = type { i32, i32, i8, ptr }
-	%struct.Trigger = type { ptr, ptr, i8, i8, ptr, ptr, %struct..4sPragmaType, ptr, ptr, ptr, ptr }
-	%struct.TriggerStack = type { ptr, i32, i32, i32, i32, i32, i32, ptr, ptr }
-	%struct.TriggerStep = type { i32, i32, ptr, ptr, %struct..4sPragmaType, ptr, ptr, ptr, ptr, ptr }
-	%struct.Vdbe = type { ptr, ptr, ptr, i32, i32, ptr, i32, i32, ptr, ptr, ptr, i32, ptr, i32, ptr, ptr, i32, i32, i32, ptr, i32, i32, %struct.Fifo, i32, i32, ptr, i32, i32, i32, i32, i32, [25 x i32], i32, i32, ptr, ptr, ptr, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, ptr, i32 }
-	%struct.VdbeFunc = type { ptr, i32, [1 x %struct.AuxData] }
-	%struct._OvflCell = type { ptr, i16 }
-	%struct._ht = type { i32, ptr }
-	%struct.anon = type { double }
-	%struct.sColMap = type { i32, ptr }
-	%struct.sqlite3 = type { ptr, i32, ptr, i32, i32, i32, i32, i8, i8, i8, i8, i32, ptr, i64, i64, i32, i32, i32, ptr, %struct.sqlite3InitInfo, i32, ptr, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, %struct.anon, ptr, ptr, ptr, ptr, i32, %struct.Hash, ptr, ptr, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
-	%struct.sqlite3InitInfo = type { i32, i32, i8 }
-	%struct.sqlite3_context = type { ptr, ptr, %struct.Mem, ptr, i32, ptr }
-	%struct.sqlite3_file = type { ptr }
-	%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
-	%struct.sqlite3_index_constraint_usage = type { i32, i8 }
-	%struct.sqlite3_index_info = type { i32, ptr, i32, ptr, ptr, i32, ptr, i32, i32, double }
-	%struct.sqlite3_io_methods = type { i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
-	%struct.sqlite3_module = type { i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
-	%struct.sqlite3_mutex = type opaque
-	%struct.sqlite3_vfs = type { i32, i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
-	%struct.sqlite3_vtab = type { ptr, i32, ptr }
-	%struct.sqlite3_vtab_cursor = type { ptr }
+  %struct..4sPragmaType = type { ptr, i32 }
+  %struct.AggInfo = type { i8, i8, i32, ptr, i32, ptr, i32, i32, i32, ptr, i32, i32 }
+  %struct.AggInfo_col = type { ptr, i32, i32, i32, i32, ptr }
+  %struct.AggInfo_func = type { ptr, ptr, i32, i32 }
+  %struct.AuxData = type { ptr, ptr }
+  %struct.Bitvec = type { i32, i32, i32, { [125 x i32] } }
+  %struct.BtCursor = type { ptr, ptr, ptr, ptr, ptr, ptr, i32, ptr, i32, %struct.CellInfo, i8, i8, ptr, i64, i32, i8, ptr }
+  %struct.BtLock = type { ptr, i32, i8, ptr }
+  %struct.BtShared = type { ptr, ptr, ptr, ptr, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, ptr, ptr, ptr, %struct.BusyHandler, i32, ptr, ptr, ptr }
+  %struct.Btree = type { ptr, ptr, i8, i8, i8, i32, ptr, ptr }
+  %struct.BtreeMutexArray = type { i32, [11 x ptr] }
+  %struct.BusyHandler = type { ptr, ptr, i32 }
+  %struct.CellInfo = type { ptr, i64, i32, i32, i16, i16, i16, i16 }
+  %struct.CollSeq = type { ptr, i8, i8, ptr, ptr, ptr }
+  %struct.Column = type { ptr, ptr, ptr, ptr, i8, i8, i8, i8 }
+  %struct.Context = type { i64, i32, %struct.Fifo }
+  %struct.CountCtx = type { i64 }
+  %struct.Cursor = type { ptr, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, ptr, i32, ptr, i64, ptr, ptr, i32, i64, ptr, ptr, i32, i32, ptr, ptr, ptr }
+  %struct.Db = type { ptr, ptr, i8, i8, ptr, ptr, ptr }
+  %struct.Expr = type { i8, i8, i16, ptr, ptr, ptr, ptr, %struct..4sPragmaType, %struct..4sPragmaType, i32, i32, ptr, i32, i32, ptr, ptr, i32 }
+  %struct.ExprList = type { i32, i32, i32, ptr }
+  %struct.ExprList_item = type { ptr, ptr, i8, i8, i8 }
+  %struct.FKey = type { ptr, ptr, ptr, ptr, i32, ptr, i8, i8, i8, i8 }
+  %struct.Fifo = type { i32, ptr, ptr }
+  %struct.FifoPage = type { i32, i32, i32, ptr, [1 x i64] }
+  %struct.FuncDef = type { i16, i8, i8, i8, ptr, ptr, ptr, ptr, ptr, [1 x i8] }
+  %struct.Hash = type { i8, i8, i32, i32, ptr, ptr }
+  %struct.HashElem = type { ptr, ptr, ptr, ptr, i32 }
+  %struct.IdList = type { ptr, i32, i32 }
+  %struct.Index = type { ptr, i32, ptr, ptr, ptr, i32, i8, i8, ptr, ptr, ptr, ptr, ptr }
+  %struct.KeyInfo = type { ptr, i8, i8, i8, i32, ptr, [1 x ptr] }
+  %struct.Mem = type { %struct.CountCtx, double, ptr, ptr, i32, i16, i8, i8, ptr }
+  %struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], ptr, ptr, ptr, i32, ptr }
+  %struct.Module = type { ptr, ptr, ptr, ptr }
+  %struct.Op = type { i8, i8, i8, i8, i32, i32, i32, { i32 } }
+  %struct.Pager = type { ptr, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, %struct.PagerLruList, ptr, ptr, ptr, i64, i64, i64, i64, i64, i32, ptr, ptr, i32, ptr, ptr, [16 x i8] }
+  %struct.PagerLruLink = type { ptr, ptr }
+  %struct.PagerLruList = type { ptr, ptr, ptr }
+  %struct.Parse = type { ptr, i32, ptr, ptr, i8, i8, i8, i8, i8, i8, i8, [8 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x i32], i32, ptr, i32, i32, i32, i32, i32, ptr, i8, %struct..4sPragmaType, %struct..4sPragmaType, %struct..4sPragmaType, ptr, ptr, ptr, ptr, ptr, ptr, %struct..4sPragmaType, i8, ptr, i32 }
+  %struct.PgHdr = type { ptr, i32, ptr, ptr, %struct.PagerLruLink, ptr, i8, i8, i8, i8, i8, i16, ptr, ptr, ptr }
+  %struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, ptr, i8, i8, i16, i32, ptr }
+  %struct.Select = type { ptr, i8, i8, i8, i8, i8, i8, i8, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, [3 x i32] }
+  %struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
+  %struct.SrcList_item = type { ptr, ptr, ptr, ptr, ptr, i8, i8, i32, ptr, ptr, i64 }
+  %struct.Table = type { ptr, i32, ptr, i32, ptr, i32, ptr, i32, ptr, ptr, ptr, ptr, i32, i8, i8, i8, i8, i8, i8, i8, ptr, ptr, i32, ptr, ptr }
+  %struct.TableLock = type { i32, i32, i8, ptr }
+  %struct.Trigger = type { ptr, ptr, i8, i8, ptr, ptr, %struct..4sPragmaType, ptr, ptr, ptr, ptr }
+  %struct.TriggerStack = type { ptr, i32, i32, i32, i32, i32, i32, ptr, ptr }
+  %struct.TriggerStep = type { i32, i32, ptr, ptr, %struct..4sPragmaType, ptr, ptr, ptr, ptr, ptr }
+  %struct.Vdbe = type { ptr, ptr, ptr, i32, i32, ptr, i32, i32, ptr, ptr, ptr, i32, ptr, i32, ptr, ptr, i32, i32, i32, ptr, i32, i32, %struct.Fifo, i32, i32, ptr, i32, i32, i32, i32, i32, [25 x i32], i32, i32, ptr, ptr, ptr, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, ptr, i32 }
+  %struct.VdbeFunc = type { ptr, i32, [1 x %struct.AuxData] }
+  %struct._OvflCell = type { ptr, i16 }
+  %struct._ht = type { i32, ptr }
+  %struct.anon = type { double }
+  %struct.sColMap = type { i32, ptr }
+  %struct.sqlite3 = type { ptr, i32, ptr, i32, i32, i32, i32, i8, i8, i8, i8, i32, ptr, i64, i64, i32, i32, i32, ptr, %struct.sqlite3InitInfo, i32, ptr, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, %struct.anon, ptr, ptr, ptr, ptr, i32, %struct.Hash, ptr, ptr, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
+  %struct.sqlite3InitInfo = type { i32, i32, i8 }
+  %struct.sqlite3_context = type { ptr, ptr, %struct.Mem, ptr, i32, ptr }
+  %struct.sqlite3_file = type { ptr }
+  %struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
+  %struct.sqlite3_index_constraint_usage = type { i32, i8 }
+  %struct.sqlite3_index_info = type { i32, ptr, i32, ptr, ptr, i32, ptr, i32, i32, double }
+  %struct.sqlite3_io_methods = type { i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
+  %struct.sqlite3_module = type { i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
+  %struct.sqlite3_mutex = type opaque
+  %struct.sqlite3_vfs = type { i32, i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
+  %struct.sqlite3_vtab = type { ptr, i32, ptr }
+  %struct.sqlite3_vtab_cursor = type { ptr }
 
 define fastcc void @sqlite3Insert(ptr %pParse, ptr %pTabList, ptr %pList, ptr %pSelect, ptr %pColumn, i32 %onError) nounwind {
 entry:
-	br i1 false, label %bb54, label %bb69.loopexit
+  br i1 false, label %bb54, label %bb69.loopexit
 
 bb54:		; preds = %entry
-	br label %bb69.loopexit
+  br label %bb69.loopexit
 
 bb59:		; preds = %bb63.preheader
-	%0 = load ptr, ptr %3, align 4		; <ptr> [#uses=0]
-	br label %bb65
+  %0 = load ptr, ptr %3, align 4		; <ptr> [#uses=0]
+  br label %bb65
 
 bb65:		; preds = %bb63.preheader, %bb59
-	%1 = load ptr, ptr %4, align 4		; <ptr> [#uses=0]
-	br i1 false, label %bb67, label %bb63.preheader
+  %1 = load ptr, ptr %4, align 4		; <ptr> [#uses=0]
+  br i1 false, label %bb67, label %bb63.preheader
 
 bb67:		; preds = %bb65
-	%2 = getelementptr %struct.IdList, ptr %pColumn, i32 0, i32 0		; <ptr> [#uses=0]
-	unreachable
+  %2 = getelementptr %struct.IdList, ptr %pColumn, i32 0, i32 0		; <ptr> [#uses=0]
+  unreachable
 
 bb69.loopexit:		; preds = %bb54, %entry
-	%3 = getelementptr %struct.IdList, ptr %pColumn, i32 0, i32 0		; <ptr> [#uses=1]
-	%4 = getelementptr %struct.IdList, ptr %pColumn, i32 0, i32 0		; <ptr> [#uses=1]
-	br label %bb63.preheader
+  %3 = getelementptr %struct.IdList, ptr %pColumn, i32 0, i32 0		; <ptr> [#uses=1]
+  %4 = getelementptr %struct.IdList, ptr %pColumn, i32 0, i32 0		; <ptr> [#uses=1]
+  br label %bb63.preheader
 
 bb63.preheader:		; preds = %bb69.loopexit, %bb65
-	br i1 false, label %bb59, label %bb65
+  br i1 false, label %bb59, label %bb65
 }
diff --git a/llvm/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll b/llvm/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll
index 6aa79e0..83177e5 100644
--- a/llvm/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll
+++ b/llvm/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll
@@ -1,21 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -disable-output
 ; PR3775
 
 ; ModuleID = 'bugpoint-reduced-simplified.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%"struct.__gnu_cxx::hash<ptr>" = type <{ i8 }>
-	%struct.__sched_param = type { i32 }
-	%struct._pthread_descr_struct = type opaque
-	%struct.pthread_attr_t = type { i32, i32, %struct.__sched_param, i32, i32, i32, i32, ptr, i32 }
-	%struct.pthread_mutex_t = type { i32, i32, ptr, i32, %llvm.dbg.anchor.type }
-	%"struct.std::_Rb_tree<ptr,std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<ptr>,std::allocator<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >" = type { %"struct.std::_Rb_tree<ptr,std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<ptr>,std::allocator<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >::_Rb_tree_impl<std::less<ptr>,false>" }
-	%"struct.std::_Rb_tree<ptr,std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<ptr>,std::allocator<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >::_Rb_tree_impl<std::less<ptr>,false>" = type { %"struct.__gnu_cxx::hash<ptr>", %"struct.std::_Rb_tree_node_base", i32 }
-	%"struct.std::_Rb_tree_iterator<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >" = type { ptr }
-	%"struct.std::_Rb_tree_node_base" = type { i32, ptr, ptr, ptr }
-	%"struct.std::pair<std::_Rb_tree_iterator<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,bool>" = type { %"struct.std::_Rb_tree_iterator<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >", i8 }
-	%"struct.std::pair<ptr const,ptr>" = type { ptr, ptr }
+  %llvm.dbg.anchor.type = type { i32, i32 }
+  %"struct.__gnu_cxx::hash<ptr>" = type <{ i8 }>
+  %struct.__sched_param = type { i32 }
+  %struct._pthread_descr_struct = type opaque
+  %struct.pthread_attr_t = type { i32, i32, %struct.__sched_param, i32, i32, i32, i32, ptr, i32 }
+  %struct.pthread_mutex_t = type { i32, i32, ptr, i32, %llvm.dbg.anchor.type }
+  %"struct.std::_Rb_tree<ptr,std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<ptr>,std::allocator<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >" = type { %"struct.std::_Rb_tree<ptr,std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<ptr>,std::allocator<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >::_Rb_tree_impl<std::less<ptr>,false>" }
+  %"struct.std::_Rb_tree<ptr,std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<ptr>,std::allocator<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >::_Rb_tree_impl<std::less<ptr>,false>" = type { %"struct.__gnu_cxx::hash<ptr>", %"struct.std::_Rb_tree_node_base", i32 }
+  %"struct.std::_Rb_tree_iterator<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >" = type { ptr }
+  %"struct.std::_Rb_tree_node_base" = type { i32, ptr, ptr, ptr }
+  %"struct.std::pair<std::_Rb_tree_iterator<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,bool>" = type { %"struct.std::_Rb_tree_iterator<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >", i8 }
+  %"struct.std::pair<ptr const,ptr>" = type { ptr, ptr }
 
 @_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (ptr, ptr), ptr @pthread_once		; <ptr> [#uses=0]
 @_ZL27__gthrw_pthread_getspecificj = weak alias ptr (i32), ptr @pthread_getspecific		; <ptr> [#uses=0]
@@ -36,75 +37,75 @@ declare fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind readnone
 
 define fastcc void @_ZNSt8_Rb_treeIPvSt4pairIKS0_S0_ESt10_Select1stIS3_ESt4lessIS0_ESaIS3_EE16_M_insert_uniqueERKS3_(ptr noalias nocapture sret(%"struct.std::pair<std::_Rb_tree_iterator<std::pair<ptr const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,bool>") %agg.result, ptr %this, ptr %__v) nounwind {
 entry:
-	br i1 false, label %bb7, label %bb
+  br i1 false, label %bb7, label %bb
 
 bb:		; preds = %bb, %entry
-	br i1 false, label %bb5, label %bb
+  br i1 false, label %bb5, label %bb
 
 bb5:		; preds = %bb
-	call fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind
-	br i1 false, label %bb11, label %bb7
+  call fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind
+  br i1 false, label %bb11, label %bb7
 
 bb7:		; preds = %bb5, %entry
-	br label %bb11
+  br label %bb11
 
 bb11:		; preds = %bb7, %bb5
-	call fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind
-	unreachable
+  call fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind
+  unreachable
 }
 
 define i32 @pthread_once(ptr, ptr) {
-       ret i32 0
+  ret i32 0
 }
 
 define ptr @pthread_getspecific(i32) {
-       ret ptr null
+  ret ptr null
 }
 
 define i32 @pthread_setspecific(i32, ptr) {
-        ret i32 0
+  ret i32 0
 }
 
 define i32 @pthread_create(ptr, ptr, ptr, ptr) {
-       ret i32 0
+  ret i32 0
 }
 
 define i32 @pthread_cancel(i32) {
-      ret i32 0
+  ret i32 0
 }
 
 define i32 @pthread_mutex_lock(ptr) {
-       ret i32 0
+  ret i32 0
 }
 
 define i32 @pthread_mutex_trylock(ptr) {
-       ret i32 0
+  ret i32 0
 }
 
 define i32 @pthread_mutex_unlock(ptr) {
-       ret i32 0
+  ret i32 0
 }
 
 define i32 @pthread_mutex_init(ptr, ptr) {
-        ret i32 0
+  ret i32 0
 }
 
 define i32 @pthread_key_create(ptr, ptr) {
-       ret i32 0
+  ret i32 0
 }
 
 define i32 @pthread_key_delete(i32) {
-        ret i32 0
+  ret i32 0
 }
 
 define i32 @pthread_mutexattr_init(ptr) {
-        ret i32 0
+  ret i32 0
 }
 
 define i32 @pthread_mutexattr_settype(ptr, i32) {
-        ret i32 0
+  ret i32 0
 }
 
 define i32 @pthread_mutexattr_destroy(ptr) {
-       ret i32 0
+  ret i32 0
 }
diff --git a/llvm/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll b/llvm/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll
index 24ad185..9694ae4 100644
--- a/llvm/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll
+++ b/llvm/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll
@@ -1,67 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn | llvm-dis
 ; PR4256
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%struct.cset = type { ptr, i8, i8, i32, ptr }
-	%struct.lmat = type { ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, ptr, ptr, ptr, ptr, ptr }
-	%struct.re_guts = type { ptr, ptr, ptr, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, ptr, ptr, i32, i32, i32, i32, [1 x i8] }
+  %llvm.dbg.anchor.type = type { i32, i32 }
+  %struct.cset = type { ptr, i8, i8, i32, ptr }
+  %struct.lmat = type { ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, ptr, ptr, ptr, ptr, ptr }
+  %struct.re_guts = type { ptr, ptr, ptr, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, ptr, ptr, i32, i32, i32, i32, [1 x i8] }
 
 define ptr @lbackref(ptr %m, ptr %start, ptr %stop, i32 %startst, i32 %stopst, i32 %lev, i32 %rec) nounwind {
 entry:
-	br label %bb63
+  br label %bb63
 
 bb:		; preds = %bb63
-	switch i32 0, label %bb62 [
-		i32 268435456, label %bb2
-		i32 805306368, label %bb9
-		i32 -1610612736, label %bb51
-	]
+  switch i32 0, label %bb62 [
+  i32 268435456, label %bb2
+  i32 805306368, label %bb9
+  i32 -1610612736, label %bb51
+  ]
 
 bb2:		; preds = %bb
-	br label %bb62
+  br label %bb62
 
 bb9:		; preds = %bb
-	%0 = load i8, ptr %sp.1, align 1		; <i8> [#uses=0]
-	br label %bb62
+  %0 = load i8, ptr %sp.1, align 1		; <i8> [#uses=0]
+  br label %bb62
 
 bb51:		; preds = %bb
-	%1 = load i8, ptr %sp.1, align 1		; <i8> [#uses=0]
-	ret ptr null
+  %1 = load i8, ptr %sp.1, align 1		; <i8> [#uses=0]
+  ret ptr null
 
 bb62:		; preds = %bb9, %bb2, %bb
-	br label %bb63
+  br label %bb63
 
 bb63:		; preds = %bb84, %bb69, %bb62, %entry
-	%sp.1 = phi ptr [ null, %bb62 ], [ %sp.1.lcssa, %bb84 ], [ %start, %entry ], [ %sp.1.lcssa, %bb69 ]		; <ptr> [#uses=3]
-	br i1 false, label %bb, label %bb65
+  %sp.1 = phi ptr [ null, %bb62 ], [ %sp.1.lcssa, %bb84 ], [ %start, %entry ], [ %sp.1.lcssa, %bb69 ]		; <ptr> [#uses=3]
+  br i1 false, label %bb, label %bb65
 
 bb65:		; preds = %bb63
-	%sp.1.lcssa = phi ptr [ %sp.1, %bb63 ]		; <ptr> [#uses=4]
-	br i1 false, label %bb66, label %bb69
+  %sp.1.lcssa = phi ptr [ %sp.1, %bb63 ]		; <ptr> [#uses=4]
+  br i1 false, label %bb66, label %bb69
 
 bb66:		; preds = %bb65
-	ret ptr null
+  ret ptr null
 
 bb69:		; preds = %bb65
-	switch i32 0, label %bb108.loopexit2.loopexit.loopexit [
-		i32 1342177280, label %bb63
-		i32 1476395008, label %bb84
-		i32 1879048192, label %bb104
-		i32 2013265920, label %bb93
-	]
+  switch i32 0, label %bb108.loopexit2.loopexit.loopexit [
+  i32 1342177280, label %bb63
+  i32 1476395008, label %bb84
+  i32 1879048192, label %bb104
+  i32 2013265920, label %bb93
+  ]
 
 bb84:		; preds = %bb69
-	%2 = tail call ptr @lbackref(ptr %m, ptr %sp.1.lcssa, ptr %stop, i32 0, i32 %stopst, i32 0, i32 0) nounwind		; <ptr> [#uses=0]
-	br label %bb63
+  %2 = tail call ptr @lbackref(ptr %m, ptr %sp.1.lcssa, ptr %stop, i32 0, i32 %stopst, i32 0, i32 0) nounwind		; <ptr> [#uses=0]
+  br label %bb63
 
 bb93:		; preds = %bb69
-	ret ptr null
+  ret ptr null
 
 bb104:		; preds = %bb69
-	%sp.1.lcssa.lcssa33 = phi ptr [ %sp.1.lcssa, %bb69 ]		; <ptr> [#uses=0]
-	unreachable
+  %sp.1.lcssa.lcssa33 = phi ptr [ %sp.1.lcssa, %bb69 ]		; <ptr> [#uses=0]
+  unreachable
 
 bb108.loopexit2.loopexit.loopexit:		; preds = %bb69
-	ret ptr null
+  ret ptr null
 }
diff --git a/llvm/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll b/llvm/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll
index 3eda7ca..c49f651 100644
--- a/llvm/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll
+++ b/llvm/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll
@@ -1,14 +1,19 @@
-; Test to make sure malloc's bitcast does not block detection of a store 
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; Test to make sure malloc's bitcast does not block detection of a store
 ; to aliased memory; GVN should not optimize away the load in this program.
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 
 define i64 @test() {
+; CHECK-LABEL: define i64 @test() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call ptr @malloc(i64 mul (i64 ptrtoint (ptr getelementptr (i64, ptr null, i64 1) to i64), i64 4))
+; CHECK-NEXT:    store i8 42, ptr [[TMP1]], align 1
+; CHECK-NEXT:    [[Y:%.*]] = load i64, ptr [[TMP1]], align 4
+; CHECK-NEXT:    ret i64 [[Y]]
+;
   %1 = tail call ptr @malloc(i64 mul (i64 4, i64 ptrtoint (ptr getelementptr (i64, ptr null, i64 1) to i64))) ; <ptr> [#uses=2]
   store i8 42, ptr %1
   %Y = load i64, ptr %1                               ; <i64> [#uses=1]
   ret i64 %Y
-; CHECK: %Y = load i64, ptr %1
-; CHECK: ret i64 %Y
 }
 
 declare noalias ptr @malloc(i64)
diff --git a/llvm/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll b/llvm/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll
index 321f3cf..c6fc7b9 100644
--- a/llvm/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll
+++ b/llvm/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll
@@ -1,9 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 
 ; CHECK-NOT: load
 ; CHECK-NOT: phi
 
 define ptr @cat(ptr %s1, ...) nounwind {
+; CHECK-LABEL: define ptr @cat(
+; CHECK-SAME: ptr [[S1:%.*]], ...) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[BB:%.*]], label [[BB3:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    unreachable
+; CHECK:       bb3:
+; CHECK-NEXT:    store ptr undef, ptr undef, align 4
+; CHECK-NEXT:    br i1 undef, label [[BB5:%.*]], label [[BB6:%.*]]
+; CHECK:       bb5:
+; CHECK-NEXT:    unreachable
+; CHECK:       bb6:
+; CHECK-NEXT:    br label [[BB12:%.*]]
+; CHECK:       bb8:
+; CHECK-NEXT:    br i1 undef, label [[BB9:%.*]], label [[BB10:%.*]]
+; CHECK:       bb9:
+; CHECK-NEXT:    br label [[BB11:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    br label [[BB11]]
+; CHECK:       bb11:
+; CHECK-NEXT:    br label [[BB12]]
+; CHECK:       bb12:
+; CHECK-NEXT:    br i1 undef, label [[BB8:%.*]], label [[BB13:%.*]]
+; CHECK:       bb13:
+; CHECK-NEXT:    ret ptr undef
+;
 entry:
   br i1 undef, label %bb, label %bb3
 
diff --git a/llvm/test/Transforms/NewGVN/2010-05-08-OneBit.ll b/llvm/test/Transforms/NewGVN/2010-05-08-OneBit.ll
index 0d2d45a..0a121ff 100644
--- a/llvm/test/Transforms/NewGVN/2010-05-08-OneBit.ll
+++ b/llvm/test/Transforms/NewGVN/2010-05-08-OneBit.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn
 ; PR7052
 
@@ -12,7 +13,7 @@ entry:
 
 l117.i.i:                                         ; preds = %entry
   invoke fastcc void @foo()
-          to label %.noexc5 unwind label %landing_pad
+  to label %.noexc5 unwind label %landing_pad
 
 .noexc5:                                          ; preds = %l117.i.i
   unreachable
@@ -22,7 +23,7 @@ k121.i.i:                                         ; preds = %entry
 
 l129.i.i:                                         ; preds = %k121.i.i
   invoke fastcc void @foo()
-          to label %.noexc7 unwind label %landing_pad
+  to label %.noexc7 unwind label %landing_pad
 
 .noexc7:                                          ; preds = %l129.i.i
   unreachable
@@ -34,7 +35,7 @@ k133.i.i:                                         ; preds = %k121.i.i
 
 l147.i.i:                                         ; preds = %k133.i.i
   invoke fastcc void @foo()
-          to label %.noexc10 unwind label %landing_pad
+  to label %.noexc10 unwind label %landing_pad
 
 .noexc10:                                         ; preds = %l147.i.i
   unreachable
@@ -44,10 +45,10 @@ k151.i.i:                                         ; preds = %k133.i.i
 
 landing_pad:                                      ; preds = %l147.i.i, %l129.i.i, %l117.i.i
   %exn = landingpad {ptr, i32}
-            cleanup
+  cleanup
   switch i32 undef, label %fin [
-    i32 1, label %catch1
-    i32 2, label %catch
+  i32 1, label %catch1
+  i32 2, label %catch
   ]
 
 fin:                                              ; preds = %landing_pad
diff --git a/llvm/test/Transforms/NewGVN/2010-11-13-Simplify.ll b/llvm/test/Transforms/NewGVN/2010-11-13-Simplify.ll
index b06570b..3d12783 100644
--- a/llvm/test/Transforms/NewGVN/2010-11-13-Simplify.ll
+++ b/llvm/test/Transforms/NewGVN/2010-11-13-Simplify.ll
@@ -1,9 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 
 declare i32 @foo(i32) readnone
 
 define i1 @bar() {
-; CHECK-LABEL: @bar(
+; CHECK-LABEL: define i1 @bar() {
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(i32 0) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    [[X:%.*]] = call i32 @foo(i32 [[A]]) #[[ATTR0]]
+; CHECK-NEXT:    ret i1 true
+;
   %a = call i32 @foo (i32 0) readnone
   %b = call i32 @foo (i32 0) readnone
   %c = and i32 %a, %b
@@ -11,5 +16,4 @@ define i1 @bar() {
   %y = call i32 @foo (i32 %c) readnone
   %z = icmp eq i32 %x, %y
   ret i1 %z
-; CHECK: ret i1 true
-} 
+}
diff --git a/llvm/test/Transforms/NewGVN/2011-04-27-phioperands.ll b/llvm/test/Transforms/NewGVN/2011-04-27-phioperands.ll
index 3e8a5d8..c039422 100644
--- a/llvm/test/Transforms/NewGVN/2011-04-27-phioperands.ll
+++ b/llvm/test/Transforms/NewGVN/2011-04-27-phioperands.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
@@ -64,10 +65,10 @@ doemit.exit76.i:
 
 "<bb 64>.i":
   switch i32 undef, label %"<bb 5>" [
-    i32 42, label %"<L54>.i"
-    i32 43, label %"<L55>.i"
-    i32 63, label %"<L56>.i"
-    i32 123, label %"<bb 5>.i258.i"
+  i32 42, label %"<L54>.i"
+  i32 43, label %"<L55>.i"
+  i32 63, label %"<L56>.i"
+  i32 123, label %"<bb 5>.i258.i"
   ]
 
 "<L54>.i":
@@ -93,14 +94,14 @@ doemit.exit127.i:
 
 "<bb 5>":
   switch i32 undef, label %"<L39>.i" [
-    i32 36, label %"<L19>.i"
-    i32 94, label %"<L18>.i"
-    i32 124, label %"<L98>.i"
-    i32 42, label %"<L99>.i"
-    i32 43, label %"<L99>.i"
-    i32 46, label %"<L24>.i"
-    i32 63, label %"<L99>.i"
-    i32 91, label %"<L28>.i"
-    i32 92, label %"<L29>.i"
+  i32 36, label %"<L19>.i"
+  i32 94, label %"<L18>.i"
+  i32 124, label %"<L98>.i"
+  i32 42, label %"<L99>.i"
+  i32 43, label %"<L99>.i"
+  i32 46, label %"<L24>.i"
+  i32 63, label %"<L99>.i"
+  i32 91, label %"<L28>.i"
+  i32 92, label %"<L29>.i"
   ]
 }
diff --git a/llvm/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll b/llvm/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll
index c547e8f..444385d 100644
--- a/llvm/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll
+++ b/llvm/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll
@@ -1,9 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 ;
 
 %0 = type { i64, i1 }
 
 define i64 @test1(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: define i64 @test1(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[TMP0]] poison, i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[TMP0]] [[TMP2]], i1 [[TMP3]], 1
+; CHECK-NEXT:    [[UADD_0:%.*]] = extractvalue [[TMP0]] [[TMP4]], 0
+; CHECK-NEXT:    [[ADD2:%.*]] = add i64 [[TMP1]], [[UADD_0]]
+; CHECK-NEXT:    ret i64 [[ADD2]]
+;
 entry:
   %uadd = tail call %0 @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
   %uadd.0 = extractvalue %0 %uadd, 0
@@ -12,11 +25,20 @@ entry:
   ret i64 %add2
 }
 
-; CHECK-LABEL: @test1(
-; CHECK-NOT: add1
-; CHECK: ret
 
 define i64 @test2(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: define i64 @test2(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[TMP0]] poison, i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[TMP0]] [[TMP2]], i1 [[TMP3]], 1
+; CHECK-NEXT:    [[USUB_0:%.*]] = extractvalue [[TMP0]] [[TMP4]], 0
+; CHECK-NEXT:    [[ADD2:%.*]] = add i64 [[TMP1]], [[USUB_0]]
+; CHECK-NEXT:    ret i64 [[ADD2]]
+;
 entry:
   %usub = tail call %0 @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
   %usub.0 = extractvalue %0 %usub, 0
@@ -25,11 +47,20 @@ entry:
   ret i64 %add2
 }
 
-; CHECK-LABEL: @test2(
-; CHECK-NOT: sub1
-; CHECK: ret
 
 define i64 @test3(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: define i64 @test3(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[TMP0]] poison, i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[TMP0]] [[TMP2]], i1 [[TMP3]], 1
+; CHECK-NEXT:    [[UMUL_0:%.*]] = extractvalue [[TMP0]] [[TMP4]], 0
+; CHECK-NEXT:    [[ADD2:%.*]] = add i64 [[TMP1]], [[UMUL_0]]
+; CHECK-NEXT:    ret i64 [[ADD2]]
+;
 entry:
   %umul = tail call %0 @llvm.umul.with.overflow.i64(i64 %a, i64 %b)
   %umul.0 = extractvalue %0 %umul, 0
@@ -38,11 +69,20 @@ entry:
   ret i64 %add2
 }
 
-; CHECK-LABEL: @test3(
-; CHECK-NOT: mul1
-; CHECK: ret
 
 define i64 @test4(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: define i64 @test4(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[TMP0]] poison, i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[TMP0]] [[TMP2]], i1 [[TMP3]], 1
+; CHECK-NEXT:    [[SADD_0:%.*]] = extractvalue [[TMP0]] [[TMP4]], 0
+; CHECK-NEXT:    [[ADD2:%.*]] = add i64 [[TMP1]], [[SADD_0]]
+; CHECK-NEXT:    ret i64 [[ADD2]]
+;
 entry:
   %sadd = tail call %0 @llvm.sadd.with.overflow.i64(i64 %a, i64 %b)
   %sadd.0 = extractvalue %0 %sadd, 0
@@ -51,11 +91,20 @@ entry:
   ret i64 %add2
 }
 
-; CHECK-LABEL: @test4(
-; CHECK-NOT: add1
-; CHECK: ret
 
 define i64 @test5(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: define i64 @test5(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[TMP0]] poison, i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[TMP0]] [[TMP2]], i1 [[TMP3]], 1
+; CHECK-NEXT:    [[SSUB_0:%.*]] = extractvalue [[TMP0]] [[TMP4]], 0
+; CHECK-NEXT:    [[ADD2:%.*]] = add i64 [[TMP1]], [[SSUB_0]]
+; CHECK-NEXT:    ret i64 [[ADD2]]
+;
 entry:
   %ssub = tail call %0 @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
   %ssub.0 = extractvalue %0 %ssub, 0
@@ -64,11 +113,20 @@ entry:
   ret i64 %add2
 }
 
-; CHECK-LABEL: @test5(
-; CHECK-NOT: sub1
-; CHECK: ret
 
 define i64 @test6(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: define i64 @test6(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[TMP0]] poison, i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[TMP0]] [[TMP2]], i1 [[TMP3]], 1
+; CHECK-NEXT:    [[SMUL_0:%.*]] = extractvalue [[TMP0]] [[TMP4]], 0
+; CHECK-NEXT:    [[ADD2:%.*]] = add i64 [[TMP1]], [[SMUL_0]]
+; CHECK-NEXT:    ret i64 [[ADD2]]
+;
 entry:
   %smul = tail call %0 @llvm.smul.with.overflow.i64(i64 %a, i64 %b)
   %smul.0 = extractvalue %0 %smul, 0
@@ -77,9 +135,6 @@ entry:
   ret i64 %add2
 }
 
-; CHECK-LABEL: @test6(
-; CHECK-NOT: mul1
-; CHECK: ret
 
 declare void @exit(i32) noreturn
 declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
diff --git a/llvm/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll b/llvm/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll
index 46e3c28..675e7da 100644
--- a/llvm/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll
+++ b/llvm/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 %struct.__fundamental_type_info_pseudo = type { %struct.__type_info_pseudo }
 %struct.__type_info_pseudo = type { ptr, ptr }
@@ -18,26 +19,70 @@ declare void @__cxa_end_catch()
 declare i32 @__gxx_personality_v0(i32, i64, ptr, ptr)
 
 define void @_Z3foov() uwtable personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: define void @_Z3foov(
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] personality ptr @__gxx_personality_v0 {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @_Z4barv()
+; CHECK-NEXT:            to label [[RETURN:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       lpad:
+; CHECK-NEXT:    [[TMP0:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            catch ptr @_ZTIi
+; CHECK-NEXT:            catch ptr @_ZTIb
+; CHECK-NEXT:            catch ptr @_ZTIi
+; CHECK-NEXT:            catch ptr @_ZTIb
+; CHECK-NEXT:    [[EXC_PTR2_I:%.*]] = extractvalue { ptr, i32 } [[TMP0]], 0
+; CHECK-NEXT:    [[FILTER3_I:%.*]] = extractvalue { ptr, i32 } [[TMP0]], 1
+; CHECK-NEXT:    [[TYPEID_I:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIi)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[FILTER3_I]], [[TYPEID_I]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[PPAD:%.*]], label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    [[TYPEID1_I:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIb)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[FILTER3_I]], [[TYPEID1_I]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[PPAD2:%.*]], label [[NEXT2:%.*]]
+; CHECK:       ppad:
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[EXC_PTR2_I]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    tail call void @__cxa_end_catch() #[[ATTR1]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       ppad2:
+; CHECK-NEXT:    [[D_2073_5_I:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[EXC_PTR2_I]]) #[[ATTR1]]
+; CHECK-NEXT:    tail call void @__cxa_end_catch() #[[ATTR1]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       next2:
+; CHECK-NEXT:    call void @_Z7cleanupv()
+; CHECK-NEXT:    br i1 [[TMP1]], label [[PPAD3:%.*]], label [[NEXT3:%.*]]
+; CHECK:       next3:
+; CHECK-NEXT:    br i1 [[TMP2]], label [[PPAD4:%.*]], label [[UNWIND:%.*]]
+; CHECK:       unwind:
+; CHECK-NEXT:    resume { ptr, i32 } [[TMP0]]
+; CHECK:       ppad3:
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[EXC_PTR2_I]]) #[[ATTR1]]
+; CHECK-NEXT:    tail call void @__cxa_end_catch() #[[ATTR1]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       ppad4:
+; CHECK-NEXT:    [[D_2080_5:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[EXC_PTR2_I]]) #[[ATTR1]]
+; CHECK-NEXT:    tail call void @__cxa_end_catch() #[[ATTR1]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
 entry:
   invoke void @_Z4barv()
-          to label %return unwind label %lpad
+  to label %return unwind label %lpad
 
 lpad:                                             ; preds = %entry
   %0 = landingpad { ptr, i32 }
-          catch ptr @_ZTIi
-          catch ptr @_ZTIb
-          catch ptr @_ZTIi
-          catch ptr @_ZTIb
+  catch ptr @_ZTIi
+  catch ptr @_ZTIb
+  catch ptr @_ZTIi
+  catch ptr @_ZTIb
   %exc_ptr2.i = extractvalue { ptr, i32 } %0, 0
   %filter3.i = extractvalue { ptr, i32 } %0, 1
   %typeid.i = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIi)
-; CHECK: call i32 @llvm.eh.typeid.for
   %1 = icmp eq i32 %filter3.i, %typeid.i
   br i1 %1, label %ppad, label %next
 
 next:                                             ; preds = %lpad
   %typeid1.i = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIb)
-; CHECK: call i32 @llvm.eh.typeid.for
   %2 = icmp eq i32 %filter3.i, %typeid1.i
   br i1 %2, label %ppad2, label %next2
 
@@ -54,7 +99,6 @@ ppad2:                                            ; preds = %next
 next2:                                            ; preds = %next
   call void @_Z7cleanupv()
   %typeid = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIi)
-; CHECK-NOT: call i32 @llvm.eh.typeid.for
   %4 = icmp eq i32 %filter3.i, %typeid
   br i1 %4, label %ppad3, label %next3
 
diff --git a/llvm/test/Transforms/NewGVN/2012-05-22-PreCrash.ll b/llvm/test/Transforms/NewGVN/2012-05-22-PreCrash.ll
index 787a3ba..1357f2b 100644
--- a/llvm/test/Transforms/NewGVN/2012-05-22-PreCrash.ll
+++ b/llvm/test/Transforms/NewGVN/2012-05-22-PreCrash.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn
 ; PR12858
 
diff --git a/llvm/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather-xfail-inseltpoison.ll b/llvm/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather-xfail-inseltpoison.ll
index 2fb275d..7b3f33b 100644
--- a/llvm/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather-xfail-inseltpoison.ll
+++ b/llvm/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather-xfail-inseltpoison.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; XFAIL: *
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 
@@ -14,6 +15,25 @@ declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x
 ; CHECK: llvm.masked.scatter
 ; CHECK: llvm.masked.gather
 define spir_kernel void @test(<2 x ptr> %in1, <2 x ptr> %in2, ptr %out) {
+; CHECK-LABEL: define spir_kernel void @test(
+; CHECK-SAME: <2 x ptr> [[IN1:%.*]], <2 x ptr> [[IN2:%.*]], ptr [[OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP_0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP_1:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP_I:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP_0]], i32 0
+; CHECK-NEXT:    [[TMP:%.*]] = insertelement <2 x ptr> [[TMP_I]], ptr [[TMP_1]], i32 1
+; CHECK-NEXT:    [[IN1_V:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[IN1]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
+; CHECK-NEXT:    [[IN2_V:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[IN2]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[IN1_V]], <2 x ptr> [[TMP]], i32 1, <2 x i1> <i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[IN2_V]], <2 x ptr> [[TMP]], i32 1, <2 x i1> <i1 true, i1 true>)
+; CHECK-NEXT:    [[TMP_V_1:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[TMP]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
+; CHECK-NEXT:    [[TMP_V_1_0:%.*]] = extractelement <2 x i32> [[TMP_V_1]], i32 0
+; CHECK-NEXT:    [[TMP_V_1_1:%.*]] = extractelement <2 x i32> [[TMP_V_1]], i32 1
+; CHECK-NEXT:    store i32 [[TMP_V_1_0]], ptr [[OUT]], align 4
+; CHECK-NEXT:    [[OUT_1:%.*]] = getelementptr i32, ptr [[OUT]], i32 1
+; CHECK-NEXT:    store i32 [[TMP_V_1_1]], ptr [[OUT_1]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   ; Just some temporary storage
   %tmp.0 = alloca i32
diff --git a/llvm/test/Transforms/NewGVN/MemdepMiscompile.ll b/llvm/test/Transforms/NewGVN/MemdepMiscompile.ll
index f2d1827..a3f1f4d 100644
--- a/llvm/test/Transforms/NewGVN/MemdepMiscompile.ll
+++ b/llvm/test/Transforms/NewGVN/MemdepMiscompile.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
@@ -7,14 +8,38 @@ target triple = "x86_64-apple-macosx10.7.0"
 ; Make sure we do not replace load %shouldExit in while.cond.backedge
 ; with a phi node where the value from while.body is 0.
 define i32 @test() nounwind ssp {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHOULDEXIT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TASKSIDLE:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 0, ptr [[SHOULDEXIT]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[TASKSIDLE]], align 4
+; CHECK-NEXT:    call void @CTestInitialize(ptr [[TASKSIDLE]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SHOULDEXIT]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[CMP1]], label [[WHILE_BODY_LR_PH:%.*]], label [[WHILE_END:%.*]]
+; CHECK:       while.body.lr.ph:
+; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    call void @RunInMode(i32 100) #[[ATTR1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TASKSIDLE]], align 4
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[WHILE_COND_BACKEDGE:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    store i32 0, ptr [[TASKSIDLE]], align 4
+; CHECK-NEXT:    call void @TimerCreate(ptr [[SHOULDEXIT]]) #[[ATTR1]]
+; CHECK-NEXT:    br label [[WHILE_COND_BACKEDGE]]
+; CHECK:       while.cond.backedge:
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[SHOULDEXIT]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]]
+; CHECK:       while.cond.while.end_crit_edge:
+; CHECK-NEXT:    br label [[WHILE_END]]
+; CHECK:       while.end:
+; CHECK-NEXT:    ret i32 0
+;
 entry:
-; CHECK: test()
-; CHECK: while.body:
-; CHECK: call void @RunInMode
-; CHECK: br i1 %tobool, label %while.cond.backedge, label %if.then
-; CHECK: while.cond.backedge:
-; CHECK: load i32, ptr %shouldExit
-; CHECK: br i1 %cmp, label %while.body
   %shouldExit = alloca i32, align 4
   %tasksIdle = alloca i32, align 4
   store i32 0, ptr %shouldExit, align 4
diff --git a/llvm/test/Transforms/NewGVN/addrspacecast.ll b/llvm/test/Transforms/NewGVN/addrspacecast.ll
index fea8a2f..394db28 100644
--- a/llvm/test/Transforms/NewGVN/addrspacecast.ll
+++ b/llvm/test/Transforms/NewGVN/addrspacecast.ll
@@ -7,7 +7,7 @@ define ptr addrspace(1) @addrspacecast(ptr %ptr) {
 ; CHECK-NEXT:    [[Z1:%.*]] = addrspacecast ptr [[PTR:%.*]] to ptr addrspace(1)
 ; CHECK-NEXT:    br label [[BLOCK2:%.*]]
 ; CHECK:       block2:
-; CHECK-NEXT:    store ptr addrspace(1) [[Z1]], ptr undef
+; CHECK-NEXT:    store ptr addrspace(1) [[Z1]], ptr undef, align 8
 ; CHECK-NEXT:    ret ptr addrspace(1) [[Z1]]
 ;
 block1:
@@ -29,7 +29,7 @@ define ptr addrspace(1) @addrspacecast_different_result_types(ptr %ptr) {
 ; CHECK-NEXT:    br label [[BLOCK2:%.*]]
 ; CHECK:       block2:
 ; CHECK-NEXT:    [[Z2:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(1)
-; CHECK-NEXT:    store ptr addrspace(2) [[Z1]], ptr undef
+; CHECK-NEXT:    store ptr addrspace(2) [[Z1]], ptr undef, align 8
 ; CHECK-NEXT:    ret ptr addrspace(1) [[Z2]]
 ;
 block1:
@@ -48,7 +48,7 @@ define ptr addrspace(1) @addrspacecast_simplify(ptr addrspace(1) %ptr) {
 ; CHECK-NEXT:    [[CAST0:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
 ; CHECK-NEXT:    br label [[BLOCK2:%.*]]
 ; CHECK:       block2:
-; CHECK-NEXT:    store ptr addrspace(1) [[PTR]], ptr undef
+; CHECK-NEXT:    store ptr addrspace(1) [[PTR]], ptr undef, align 8
 ; CHECK-NEXT:    ret ptr addrspace(1) [[PTR]]
 ;
 block1:
@@ -70,7 +70,7 @@ define ptr addrspace(1) @addrspacecast_constant() {
 ; CHECK-NEXT:    store ptr undef, ptr @h, align 4
 ; CHECK-NEXT:    br label [[BLOCK2:%.*]]
 ; CHECK:       block2:
-; CHECK-NEXT:    store ptr addrspace(1) undef, ptr undef
+; CHECK-NEXT:    store ptr addrspace(1) undef, ptr undef, align 8
 ; CHECK-NEXT:    ret ptr addrspace(1) undef
 ;
 block1:
@@ -88,11 +88,11 @@ block2:
 define ptr addrspace(1) @addrspacecast_leader(ptr %arg.ptr) {
 ; CHECK-LABEL: @addrspacecast_leader(
 ; CHECK-NEXT:  block1:
-; CHECK-NEXT:    [[LOAD0:%.*]] = load ptr, ptr [[ARG_PTR:%.*]]
+; CHECK-NEXT:    [[LOAD0:%.*]] = load ptr, ptr [[ARG_PTR:%.*]], align 8
 ; CHECK-NEXT:    [[Z1:%.*]] = addrspacecast ptr [[LOAD0]] to ptr addrspace(1)
 ; CHECK-NEXT:    br label [[BLOCK2:%.*]]
 ; CHECK:       block2:
-; CHECK-NEXT:    store ptr addrspace(1) [[Z1]], ptr undef
+; CHECK-NEXT:    store ptr addrspace(1) [[Z1]], ptr undef, align 8
 ; CHECK-NEXT:    ret ptr addrspace(1) [[Z1]]
 ;
 block1:
diff --git a/llvm/test/Transforms/NewGVN/basic-cyclic-opt.ll b/llvm/test/Transforms/NewGVN/basic-cyclic-opt.ll
index baef8b5..5319046 100644
--- a/llvm/test/Transforms/NewGVN/basic-cyclic-opt.ll
+++ b/llvm/test/Transforms/NewGVN/basic-cyclic-opt.ll
@@ -245,7 +245,7 @@ bb23:                                             ; preds = %bb4
 define i8 @irreducible_memoryphi(ptr noalias %arg, ptr noalias %arg2) {
 ; CHECK-LABEL: @irreducible_memoryphi(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store i8 0, ptr [[ARG:%.*]]
+; CHECK-NEXT:    store i8 0, ptr [[ARG:%.*]], align 1
 ; CHECK-NEXT:    br i1 undef, label [[BB2:%.*]], label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[BB2]]
diff --git a/llvm/test/Transforms/NewGVN/basic-undef-test.ll b/llvm/test/Transforms/NewGVN/basic-undef-test.ll
index 5b731fc..148c022 100644
--- a/llvm/test/Transforms/NewGVN/basic-undef-test.ll
+++ b/llvm/test/Transforms/NewGVN/basic-undef-test.ll
@@ -1,15 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 ; ModuleID = 'test3.ll'
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 define i32 @main(ptr %foo)  {
+; CHECK-LABEL: define i32 @main(
+; CHECK-SAME: ptr [[FOO:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[FOO]], align 4
+; CHECK-NEXT:    store i32 5, ptr undef, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], [[TMP0]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
 entry:
-; CHECK: load i32, ptr %foo, align 4
   %0 = load i32, ptr %foo, align 4
   store i32 5, ptr undef, align 4
-; CHECK-NOT: load i32, ptr %foo, align 4
   %1 = load i32, ptr %foo, align 4
-; CHECK: add i32 %0, %0
   %2 = add i32 %0, %1
   ret i32 %2
 }
diff --git a/llvm/test/Transforms/NewGVN/br-identical.ll b/llvm/test/Transforms/NewGVN/br-identical.ll
index c998385..23f43b0 100644
--- a/llvm/test/Transforms/NewGVN/br-identical.ll
+++ b/llvm/test/Transforms/NewGVN/br-identical.ll
@@ -1,8 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S -o - %s | FileCheck %s
 
 ; If a branch has two identical successors, we cannot declare either dead.
 
 define void @widget(i1 %p) {
+; CHECK-LABEL: define void @widget(
+; CHECK-SAME: i1 [[P:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[T1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[T2:%.*]], [[BB7:%.*]] ]
+; CHECK-NEXT:    [[T2]] = add i64 [[T1]], 1
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult i64 0, [[T2]]
+; CHECK-NEXT:    br i1 [[T3]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[T4:%.*]] = call i64 @f()
+; CHECK-NEXT:    br label [[BB4]]
+; CHECK:       bb4:
+; CHECK-NEXT:    br i1 [[P]], label [[BB5:%.*]], label [[BB6:%.*]]
+; CHECK:       bb5:
+; CHECK-NEXT:    br i1 true, label [[BB7]], label [[BB7]]
+; CHECK:       bb6:
+; CHECK-NEXT:    br i1 true, label [[BB7]], label [[BB7]]
+; CHECK:       bb7:
+; CHECK-NEXT:    br i1 [[P]], label [[BB2]], label [[BB8:%.*]]
+; CHECK:       bb8:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %bb2
 
@@ -17,7 +41,6 @@ bb3:
   br label %bb4
 
 bb4:
-  ; CHECK-NOT: phi {{.*}} undef
   %foo = phi i64 [ %t4, %bb3 ], [ 0, %bb2 ]
   br i1 %p, label %bb5, label %bb6
 
diff --git a/llvm/test/Transforms/NewGVN/calloc-load-removal.ll b/llvm/test/Transforms/NewGVN/calloc-load-removal.ll
index a8a1e66..608f739 100644
--- a/llvm/test/Transforms/NewGVN/calloc-load-removal.ll
+++ b/llvm/test/Transforms/NewGVN/calloc-load-removal.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -S -passes=newgvn < %s | FileCheck %s
 ; Check that loads from calloc are recognized as being zero.
 
@@ -5,14 +6,15 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Function Attrs: nounwind uwtable
 define i32 @test1() {
+; CHECK-LABEL: define i32 @test1() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias ptr @calloc(i64 1, i64 4)
+; CHECK-NEXT:    ret i32 0
+;
   %1 = tail call noalias ptr @calloc(i64 1, i64 4)
   ; This load is trivially constant zero
   %2 = load i32, ptr %1, align 4
   ret i32 %2
 
-; CHECK-LABEL: @test1(
-; CHECK-NOT: %2 = load i32, ptr %1, align 4
-; CHECK: ret i32 0
 }
 
 declare noalias ptr @calloc(i64, i64) mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc"
diff --git a/llvm/test/Transforms/NewGVN/calls-readonly.ll b/llvm/test/Transforms/NewGVN/calls-readonly.ll
index 68d74c1..49f5d3a 100644
--- a/llvm/test/Transforms/NewGVN/calls-readonly.ll
+++ b/llvm/test/Transforms/NewGVN/calls-readonly.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 ; Should delete the second call to strlen even though the intervening strchr call exists.
 
@@ -5,6 +6,22 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i386-apple-darwin7"
 
 define ptr @test(ptr %P, ptr %Q, i32 %x, i32 %y) nounwind readonly {
+; CHECK-LABEL: define ptr @test(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @strlen(ptr [[P]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[BB:%.*]], label [[BB1:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[TMP2:%.*]] = sdiv i32 [[X]], [[Y]]
+; CHECK-NEXT:    br label [[BB1]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi i32 [ [[TMP2]], [[BB]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call ptr @strchr(ptr [[Q]], i32 97)
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[X_ADDR_0]], [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP3]], i32 [[X_ADDR_0]]
+; CHECK-NEXT:    ret ptr [[TMP5]]
+;
 entry:
   %0 = tail call i32 @strlen(ptr %P)              ; <i32> [#uses=2]
   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
@@ -24,21 +41,6 @@ bb1:                                              ; preds = %bb, %entry
   ret ptr %6
 }
 
-; CHECK: define ptr @test(ptr %P, ptr %Q, i32 %x, i32 %y) #0 {
-; CHECK: entry:
-; CHECK-NEXT:   %0 = tail call i32 @strlen(ptr %P)
-; CHECK-NEXT:   %1 = icmp eq i32 %0, 0
-; CHECK-NEXT:   br i1 %1, label %bb, label %bb1
-; CHECK: bb:
-; CHECK-NEXT:   %2 = sdiv i32 %x, %y
-; CHECK-NEXT:   br label %bb1
-; CHECK: bb1:
-; CHECK-NEXT:   %x_addr.0 = phi i32 [ %2, %bb ], [ %x, %entry ]
-; CHECK-NEXT:   %3 = tail call ptr @strchr(ptr %Q, i32 97)
-; CHECK-NEXT:   %4 = add i32 %x_addr.0, %0
-; CHECK-NEXT:   %5 = getelementptr i8, ptr %3, i32 %x_addr.0
-; CHECK-NEXT:   ret ptr %5
-; CHECK: }
 
 declare i32 @strlen(ptr) nounwind readonly
 
diff --git a/llvm/test/Transforms/NewGVN/completeness.ll b/llvm/test/Transforms/NewGVN/completeness.ll
index d968c78..4841e2e 100644
--- a/llvm/test/Transforms/NewGVN/completeness.ll
+++ b/llvm/test/Transforms/NewGVN/completeness.ll
@@ -6,9 +6,12 @@ define i32 @test1(i32, ptr) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
-; CHECK:         br label [[TMP6:%.*]]
-; CHECK:         br label [[TMP6]]
-; CHECK:         [[PHIOFOPS:%.*]] = phi i32 [ 105, [[TMP5]] ], [ 75, [[TMP4]] ]
+; CHECK:       4:
+; CHECK-NEXT:    br label [[TMP6:%.*]]
+; CHECK:       5:
+; CHECK-NEXT:    br label [[TMP6]]
+; CHECK:       6:
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i32 [ 105, [[TMP5]] ], [ 75, [[TMP4]] ]
 ; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 5, [[TMP4]] ], [ 7, [[TMP5]] ]
 ; CHECK-NEXT:    ret i32 [[PHIOFOPS]]
 ;
@@ -31,9 +34,12 @@ define i32 @test1b(i32, ptr) {
 ; CHECK-LABEL: @test1b(
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
-; CHECK:         br label [[TMP6:%.*]]
-; CHECK:         br label [[TMP6]]
-; CHECK:         [[PHIOFOPS1:%.*]] = phi i32 [ 105, [[TMP5]] ], [ 75, [[TMP4]] ]
+; CHECK:       4:
+; CHECK-NEXT:    br label [[TMP6:%.*]]
+; CHECK:       5:
+; CHECK-NEXT:    br label [[TMP6]]
+; CHECK:       6:
+; CHECK-NEXT:    [[PHIOFOPS1:%.*]] = phi i32 [ 105, [[TMP5]] ], [ 75, [[TMP4]] ]
 ; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i32 [ 1575, [[TMP5]] ], [ 1125, [[TMP4]] ]
 ; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 5, [[TMP4]] ], [ 7, [[TMP5]] ]
 ; CHECK-NEXT:    ret i32 [[PHIOFOPS]]
@@ -58,9 +64,12 @@ define i32 @test2(i32) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; CHECK:         br label [[TMP5:%.*]]
-; CHECK:         br label [[TMP5]]
-; CHECK:         [[DOT01:%.*]] = phi i32 [ 3, [[TMP3]] ], [ 2, [[TMP4]] ]
+; CHECK:       3:
+; CHECK-NEXT:    br label [[TMP5:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    br label [[TMP5]]
+; CHECK:       5:
+; CHECK-NEXT:    [[DOT01:%.*]] = phi i32 [ 3, [[TMP3]] ], [ 2, [[TMP4]] ]
 ; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 2, [[TMP3]] ], [ 3, [[TMP4]] ]
 ; CHECK-NEXT:    ret i32 5
 ;
@@ -158,9 +167,12 @@ define i32 @test4(i32, ptr, ptr noalias, ptr noalias) {
 ; CHECK-NEXT:    store i32 7, ptr [[TMP3:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; CHECK:         br label [[TMP8:%.*]]
-; CHECK:         br label [[TMP8]]
-; CHECK:         [[DOT01:%.*]] = phi i32 [ 5, [[TMP6]] ], [ 7, [[TMP7]] ]
+; CHECK:       6:
+; CHECK-NEXT:    br label [[TMP8:%.*]]
+; CHECK:       7:
+; CHECK-NEXT:    br label [[TMP8]]
+; CHECK:       8:
+; CHECK-NEXT:    [[DOT01:%.*]] = phi i32 [ 5, [[TMP6]] ], [ 7, [[TMP7]] ]
 ; CHECK-NEXT:    [[DOT0:%.*]] = phi ptr [ [[TMP2]], [[TMP6]] ], [ [[TMP3]], [[TMP7]] ]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOT0]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul nsw i32 [[TMP9]], 15
@@ -287,19 +299,19 @@ bb28:                                             ; preds = %bb27, %bb
 define i8 @test6(ptr %addr) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:  entry-block:
-; CHECK-NEXT:    br label %main-loop
+; CHECK-NEXT:    br label [[MAIN_LOOP:%.*]]
 ; CHECK:       main-loop:
-; CHECK-NEXT:    [[PHIOFOPS1:%.*]] = phi i1 [ true, %entry-block ], [ false, [[CORE:%.*]] ]
-; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i1 [ false, %entry-block ], [ true, [[CORE]] ]
-; CHECK-NEXT:    [[PHI:%.*]] = phi i8 [ 0, %entry-block ], [ 1, [[CORE]] ]
-; CHECK-NEXT:    store volatile i8 0, ptr [[ADDR:%.*]]
-; CHECK-NEXT:    br i1 [[PHIOFOPS1]], label %busy-wait-phi-0, label [[EXIT:%.*]]
+; CHECK-NEXT:    [[PHIOFOPS1:%.*]] = phi i1 [ true, [[ENTRY_BLOCK:%.*]] ], [ false, [[CORE:%.*]] ]
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i1 [ false, [[ENTRY_BLOCK]] ], [ true, [[CORE]] ]
+; CHECK-NEXT:    [[PHI:%.*]] = phi i8 [ 0, [[ENTRY_BLOCK]] ], [ 1, [[CORE]] ]
+; CHECK-NEXT:    store volatile i8 0, ptr [[ADDR:%.*]], align 1
+; CHECK-NEXT:    br i1 [[PHIOFOPS1]], label [[BUSY_WAIT_PHI_0:%.*]], label [[EXIT:%.*]]
 ; CHECK:       busy-wait-phi-0:
-; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i8, ptr [[ADDR]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i8, ptr [[ADDR]], align 1
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq i8 [[LOAD]], 0
-; CHECK-NEXT:    br i1 [[ICMP]], label %busy-wait-phi-0, label [[CORE]]
+; CHECK-NEXT:    br i1 [[ICMP]], label [[BUSY_WAIT_PHI_0]], label [[CORE]]
 ; CHECK:       core:
-; CHECK-NEXT:    br i1 [[PHIOFOPS]], label [[TRAP:%.*]], label %main-loop
+; CHECK-NEXT:    br i1 [[PHIOFOPS]], label [[TRAP:%.*]], label [[MAIN_LOOP]]
 ; CHECK:       trap:
 ; CHECK-NEXT:    ret i8 1
 ; CHECK:       exit:
@@ -507,13 +519,13 @@ declare ptr @wombat()
 define void @test12(ptr %p) {
 ; CHECK-LABEL: @test12(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr %p
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[BB2:%.*]], label [[BB8:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    br i1 true, label [[BB6:%.*]], label [[BB7]]
+; CHECK-NEXT:    br i1 true, label [[BB6:%.*]], label [[BB7:%.*]]
 ; CHECK:       bb6:
 ; CHECK-NEXT:    br label [[BB7]]
 ; CHECK:       bb7:
@@ -551,7 +563,7 @@ define void @test13() {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP:%.*]] = load i8, ptr null
+; CHECK-NEXT:    [[TMP:%.*]] = load i8, ptr null, align 1
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i8 [ [[TMP]], [[BB1]] ], [ [[TMP10:%.*]], [[BB3]] ]
@@ -560,7 +572,7 @@ define void @test13() {
 ; CHECK-NEXT:    [[TMP6]] = getelementptr i8, ptr [[TMP4]], i64 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = sext i8 [[PHIOFOPS]] to i32
 ; CHECK-NEXT:    [[TMP9]] = mul i32 [[TMP5]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10]] = load i8, ptr [[TMP6]]
+; CHECK-NEXT:    [[TMP10]] = load i8, ptr [[TMP6]], align 1
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[BB12:%.*]], label [[BB3]]
 ; CHECK:       bb12:
diff --git a/llvm/test/Transforms/NewGVN/cond_br.ll b/llvm/test/Transforms/NewGVN/cond_br.ll
index 3dbeb39..930e5b3 100644
--- a/llvm/test/Transforms/NewGVN/cond_br.ll
+++ b/llvm/test/Transforms/NewGVN/cond_br.ll
@@ -1,12 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 @y = external global i32
 @z = external global i32
 
 ; Function Attrs: nounwind ssp uwtable
 define void @foo(i32 %x) {
-; CHECK: @foo(i32 %x)
-; CHECK: %.pre = load i32, ptr @y
-; CHECK: call void @bar(i32 %.pre)
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, ptr @y, align 4
+; CHECK-NEXT:    br i1 false, label [[IF_THEN:%.*]], label [[ENTRY_IF_END_CRIT_EDGE:%.*]]
+; CHECK:       entry.if.end_crit_edge:
+; CHECK-NEXT:    br label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    tail call void @bar(i32 [[DOTPRE]])
+; CHECK-NEXT:    ret void
+;
 
   %t = sub i32 %x, %x
   %.pre = load i32, ptr @y, align 4
@@ -28,9 +39,21 @@ if.end:                                           ; preds = %entry.if.end_crit_e
 }
 
 define void @foo2(i32 %x) {
-; CHECK: @foo2(i32 %x)
-; CHECK: %.pre = load i32, ptr @y
-; CHECK: tail call void @bar(i32 %.pre)
+; CHECK-LABEL: define void @foo2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, ptr @y, align 4
+; CHECK-NEXT:    br i1 false, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
+; CHECK-NEXT:    br label [[IF_END:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    store i32 1, ptr @z, align 4
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    tail call void @bar(i32 [[DOTPRE]])
+; CHECK-NEXT:    ret void
+;
 entry:
   %t = sub i32 %x, %x
   %.pre = load i32, ptr @y, align 4
diff --git a/llvm/test/Transforms/NewGVN/condprop.ll b/llvm/test/Transforms/NewGVN/condprop.ll
index e685dfe..d97fd38 100644
--- a/llvm/test/Transforms/NewGVN/condprop.ll
+++ b/llvm/test/Transforms/NewGVN/condprop.ll
@@ -134,11 +134,11 @@ define void @test4(i1 %b, i32 %x) {
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[SW:%.*]], label [[CASE3:%.*]]
 ; CHECK:       sw:
 ; CHECK-NEXT:    switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 0, label [[CASE0:%.*]]
-; CHECK-NEXT:    i32 1, label [[CASE1:%.*]]
-; CHECK-NEXT:    i32 2, label [[CASE0]]
-; CHECK-NEXT:    i32 3, label [[CASE3]]
-; CHECK-NEXT:    i32 4, label [[DEFAULT]]
+; CHECK-NEXT:      i32 0, label [[CASE0:%.*]]
+; CHECK-NEXT:      i32 1, label [[CASE1:%.*]]
+; CHECK-NEXT:      i32 2, label [[CASE0]]
+; CHECK-NEXT:      i32 3, label [[CASE3]]
+; CHECK-NEXT:      i32 4, label [[DEFAULT]]
 ; CHECK-NEXT:    ]
 ; CHECK:       default:
 ; CHECK-NEXT:    call void @bar(i32 [[X]])
diff --git a/llvm/test/Transforms/NewGVN/crash-no-aa.ll b/llvm/test/Transforms/NewGVN/crash-no-aa.ll
index 55e2bcb..30f2e37 100644
--- a/llvm/test/Transforms/NewGVN/crash-no-aa.ll
+++ b/llvm/test/Transforms/NewGVN/crash-no-aa.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -disable-basic-aa -passes=newgvn -S < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/llvm/test/Transforms/NewGVN/crash-usecounts.ll b/llvm/test/Transforms/NewGVN/crash-usecounts.ll
index 5527bea..5cae740 100644
--- a/llvm/test/Transforms/NewGVN/crash-usecounts.ll
+++ b/llvm/test/Transforms/NewGVN/crash-usecounts.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -disable-output < %s
 
 define void @test(i1 %arg, i1 %arg1) {
diff --git a/llvm/test/Transforms/NewGVN/crash.ll b/llvm/test/Transforms/NewGVN/crash.ll
index c886bd3..26eaa76 100644
--- a/llvm/test/Transforms/NewGVN/crash.ll
+++ b/llvm/test/Transforms/NewGVN/crash.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -disable-output < %s
 
 ; PR5631
@@ -106,7 +107,7 @@ if.then21.i:
   ret ptr undef
 
 do.body36.i:
-  %ivar38.i = load i64, ptr @g 
+  %ivar38.i = load i64, ptr @g
   %add.ptr39.sum.i = add i64 %ivar38.i, 8
   %tmp40.i = getelementptr inbounds i8, ptr %tmp18.i, i64 %add.ptr39.sum.i
   %tmp41.i = load i64, ptr %tmp40.i
@@ -132,14 +133,14 @@ declare i32 @foo2()
 define i32 @test4() {
 entry:
   ret i32 0
-  
+
 dead:
   %P2 = getelementptr i32, ptr %P2, i32 52
   %Q2 = getelementptr i32, ptr %Q2, i32 52
   store i32 4, ptr %P2
   %A = load i32, ptr %Q2
   br i1 true, label %dead, label %dead2
-  
+
 dead2:
   ret i32 %A
 }
diff --git a/llvm/test/Transforms/NewGVN/cyclic-phi-handling.ll b/llvm/test/Transforms/NewGVN/cyclic-phi-handling.ll
index 4a2f0b9..dc15079 100644
--- a/llvm/test/Transforms/NewGVN/cyclic-phi-handling.ll
+++ b/llvm/test/Transforms/NewGVN/cyclic-phi-handling.ll
@@ -5,15 +5,15 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 define void @foo(i32 %arg, i32 %arg1, ptr %arg2) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    br label %bb3
+; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ %arg1, %bb ], [ [[TMP:%.*]]4, %bb7 ]
-; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ %arg, %bb ], [ [[TMP]], %bb7 ]
-; CHECK-NEXT:    [[TMP5:%.*]] = call i32 %arg2(i32 [[TMP4]], i32 [[TMP]])
+; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ [[ARG1:%.*]], [[BB:%.*]] ], [ [[TMP4:%.*]], [[BB7:%.*]] ]
+; CHECK-NEXT:    [[TMP4]] = phi i32 [ [[ARG:%.*]], [[BB]] ], [ [[TMP]], [[BB7]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 [[ARG2:%.*]](i32 [[TMP4]], i32 [[TMP]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
-; CHECK-NEXT:    br i1 [[TMP6]], label %bb7, label %bb8
+; CHECK-NEXT:    br i1 [[TMP6]], label [[BB7]], label [[BB8:%.*]]
 ; CHECK:       bb7:
-; CHECK-NEXT:    br label %bb3
+; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb8:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/NewGVN/dbg-redundant-load.ll b/llvm/test/Transforms/NewGVN/dbg-redundant-load.ll
index 01d95ae..cd2eca0 100644
--- a/llvm/test/Transforms/NewGVN/dbg-redundant-load.ll
+++ b/llvm/test/Transforms/NewGVN/dbg-redundant-load.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 ; Check that the redundant load from %if.then is removed.
@@ -6,15 +7,22 @@
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-; CHECK: @test_redundant_load(
-; CHECK-LABEL: entry:
-; CHECK-NEXT: load i32, ptr %Y, align 4, !dbg ![[LOC:[0-9]+]]
-; CHECK-LABEL: if.then:
-; CHECK-NOT: load
-; CHECK-LABEL: if.end:
-; CHECK: ![[LOC]] = !DILocation(line: 3, scope: !{{.*}})
 
 define i32 @test_redundant_load(i32 %X, ptr %Y) !dbg !6 {
+; CHECK-LABEL: define i32 @test_redundant_load(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[Y:%.*]]) !dbg [[DBG6:![0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !dbg [[DBG8:![0-9]+]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X]], -1, !dbg [[DBG9:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]], !dbg [[DBG9]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP0]], !dbg [[DBG10:![0-9]+]]
+; CHECK-NEXT:    call void @foo(), !dbg [[DBG11:![0-9]+]]
+; CHECK-NEXT:    br label [[IF_END]], !dbg [[DBG12:![0-9]+]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[RESULT_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[RESULT_0]], !dbg [[DBG13:![0-9]+]]
+;
 entry:
   %0 = load i32, ptr %Y, align 4, !dbg !8
   %cmp = icmp sgt i32 %X, -1, !dbg !9
@@ -50,3 +58,16 @@ declare void @foo()
 !11 = !DILocation(line: 7, scope: !6)
 !12 = !DILocation(line: 8, scope: !6)
 !13 = !DILocation(line: 10, scope: !6)
+;.
+; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: [[META1:![0-9]+]], isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: [[META2:![0-9]+]])
+; CHECK: [[META1]] = !DIFile(filename: "test.cpp", directory: "")
+; CHECK: [[META2]] = !{}
+; CHECK: [[DBG6]] = distinct !DISubprogram(name: "test_redundant_load", scope: [[META1]], file: [[META1]], line: 2, type: [[META7:![0-9]+]], scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META2]])
+; CHECK: [[META7]] = !DISubroutineType(types: [[META2]])
+; CHECK: [[DBG8]] = !DILocation(line: 3, scope: [[DBG6]])
+; CHECK: [[DBG9]] = !DILocation(line: 5, scope: [[DBG6]])
+; CHECK: [[DBG10]] = !DILocation(line: 6, scope: [[DBG6]])
+; CHECK: [[DBG11]] = !DILocation(line: 7, scope: [[DBG6]])
+; CHECK: [[DBG12]] = !DILocation(line: 8, scope: [[DBG6]])
+; CHECK: [[DBG13]] = !DILocation(line: 10, scope: [[DBG6]])
+;.
diff --git a/llvm/test/Transforms/NewGVN/edge.ll b/llvm/test/Transforms/NewGVN/edge.ll
index 8699c85..143e52c 100644
--- a/llvm/test/Transforms/NewGVN/edge.ll
+++ b/llvm/test/Transforms/NewGVN/edge.ll
@@ -1,7 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 define i32 @f1(i32 %x) {
-  ; CHECK-LABEL: define i32 @f1(
+; CHECK-LABEL: define i32 @f1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[BB2:%.*]], label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    ret i32 [[X]]
+;
 bb0:
   %cmp = icmp eq i32 %x, 0
   br i1 %cmp, label %bb2, label %bb1
@@ -11,12 +21,19 @@ bb2:
   %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ]
   %foo = add i32 %cond, %x
   ret i32 %foo
-  ; CHECK: bb2:
-  ; CHECK: ret i32 %x
 }
 
 define i32 @f2(i32 %x) {
-  ; CHECK-LABEL: define i32 @f2(
+; CHECK-LABEL: define i32 @f2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[X]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    ret i32 [[X]]
+;
 bb0:
   %cmp = icmp ne i32 %x, 0
   br i1 %cmp, label %bb1, label %bb2
@@ -26,12 +43,20 @@ bb2:
   %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ]
   %foo = add i32 %cond, %x
   ret i32 %foo
-  ; CHECK: bb2:
-  ; CHECK: ret i32 %x
 }
 
 define i32 @f3(i32 %x) {
-  ; CHECK-LABEL: define i32 @f3(
+; CHECK-LABEL: define i32 @f3(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    switch i32 [[X]], label [[BB1:%.*]] [
+; CHECK-NEXT:      i32 0, label [[BB2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    ret i32 [[X]]
+;
 bb0:
   switch i32 %x, label %bb1 [ i32 0, label %bb2]
 bb1:
@@ -40,13 +65,21 @@ bb2:
   %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ]
   %foo = add i32 %cond, %x
   ret i32 %foo
-  ; CHECK: bb2:
-  ; CHECK: ret i32 %x
 }
 
 declare void @g(i1)
 define void @f4(ptr %x)  {
 ; CHECK-LABEL: define void @f4(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    [[Y:%.*]] = icmp eq ptr null, [[X]]
+; CHECK-NEXT:    br i1 [[Y]], label [[BB2:%.*]], label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    call void @g(i1 [[Y]])
+; CHECK-NEXT:    ret void
+;
 bb0:
   %y = icmp eq ptr null, %x
   br i1 %y, label %bb2, label %bb1
@@ -55,11 +88,22 @@ bb1:
 bb2:
   %zed = icmp eq ptr null, %x
   call void @g(i1 %zed)
-; CHECK: call void @g(i1 %y)
   ret void
 }
 
 define double @fcmp_oeq_not_zero(double %x, double %y) {
+; CHECK-LABEL: define double @fcmp_oeq_not_zero(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq double [[Y]], 2.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[RETURN:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[X]], 2.000000e+00
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[RETVAL]]
+;
 entry:
   %cmp = fcmp oeq double %y, 2.0
   br i1 %cmp, label %if, label %return
@@ -72,11 +116,21 @@ return:
   %retval = phi double [ %div, %if ], [ %x, %entry ]
   ret double %retval
 
-; CHECK-LABEL: define double @fcmp_oeq_not_zero(
-; CHECK: %div = fdiv double %x, 2.0
 }
 
 define double @fcmp_une_not_zero(double %x, double %y) {
+; CHECK-LABEL: define double @fcmp_une_not_zero(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double [[Y]], 2.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[X]], 2.000000e+00
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[RETVAL]]
+;
 entry:
   %cmp = fcmp une double %y, 2.0
   br i1 %cmp, label %return, label %else
@@ -89,14 +143,24 @@ return:
   %retval = phi double [ %div, %else ], [ %x, %entry ]
   ret double %retval
 
-; CHECK-LABEL: define double @fcmp_une_not_zero(
-; CHECK: %div = fdiv double %x, 2.0
 }
 
-; PR22376 - We can't propagate zero constants because -0.0 
+; PR22376 - We can't propagate zero constants because -0.0
 ; compares equal to 0.0. If %y is -0.0 in this test case,
 ; we would produce the wrong sign on the infinity return value.
 define double @fcmp_oeq_zero(double %x, double %y) {
+; CHECK-LABEL: define double @fcmp_oeq_zero(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq double [[Y]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[RETURN:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[X]], [[Y]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[RETVAL]]
+;
 entry:
   %cmp = fcmp oeq double %y, 0.0
   br i1 %cmp, label %if, label %return
@@ -109,11 +173,21 @@ return:
   %retval = phi double [ %div, %if ], [ %x, %entry ]
   ret double %retval
 
-; CHECK-LABEL: define double @fcmp_oeq_zero(
-; CHECK: %div = fdiv double %x, %y
 }
 
 define double @fcmp_une_zero(double %x, double %y) {
+; CHECK-LABEL: define double @fcmp_une_zero(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double [[Y]], -0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[X]], [[Y]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[RETVAL]]
+;
 entry:
   %cmp = fcmp une double %y, -0.0
   br i1 %cmp, label %return, label %else
@@ -126,45 +200,65 @@ return:
   %retval = phi double [ %div, %else ], [ %x, %entry ]
   ret double %retval
 
-; CHECK-LABEL: define double @fcmp_une_zero(
-; CHECK: %div = fdiv double %x, %y
 }
 
 ; We also cannot propagate a value if it's not a constant.
 ; This is because the value could be 0.0 or -0.0.
 
 define double @fcmp_oeq_maybe_zero(double %x, double %y, double %z1, double %z2) {
+; CHECK-LABEL: define double @fcmp_oeq_maybe_zero(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]], double [[Z1:%.*]], double [[Z2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[Z:%.*]] = fadd double [[Z1]], [[Z2]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq double [[Y]], [[Z]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[RETURN:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[X]], [[Z]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[RETVAL]]
+;
 entry:
- %z = fadd double %z1, %z2
- %cmp = fcmp oeq double %y, %z
- br i1 %cmp, label %if, label %return
+  %z = fadd double %z1, %z2
+  %cmp = fcmp oeq double %y, %z
+  br i1 %cmp, label %if, label %return
 
 if:
- %div = fdiv double %x, %z
- br label %return
+  %div = fdiv double %x, %z
+  br label %return
 
 return:
- %retval = phi double [ %div, %if ], [ %x, %entry ]
- ret double %retval
+  %retval = phi double [ %div, %if ], [ %x, %entry ]
+  ret double %retval
 
-; CHECK-LABEL: define double @fcmp_oeq_maybe_zero(
-; CHECK: %div = fdiv double %x, %z
 }
 
 define double @fcmp_une_maybe_zero(double %x, double %y, double %z1, double %z2) {
+; CHECK-LABEL: define double @fcmp_une_maybe_zero(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]], double [[Z1:%.*]], double [[Z2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[Z:%.*]] = fadd double [[Z1]], [[Z2]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double [[Y]], [[Z]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[X]], [[Z]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret double [[RETVAL]]
+;
 entry:
- %z = fadd double %z1, %z2
- %cmp = fcmp une double %y, %z
- br i1 %cmp, label %return, label %else
+  %z = fadd double %z1, %z2
+  %cmp = fcmp une double %y, %z
+  br i1 %cmp, label %return, label %else
 
 else:
- %div = fdiv double %x, %z
- br label %return
+  %div = fdiv double %x, %z
+  br label %return
 
 return:
- %retval = phi double [ %div, %else ], [ %x, %entry ]
- ret double %retval
+  %retval = phi double [ %div, %else ], [ %x, %entry ]
+  ret double %retval
 
-; CHECK-LABEL: define double @fcmp_une_maybe_zero(
-; CHECK: %div = fdiv double %x, %z
 }
diff --git a/llvm/test/Transforms/NewGVN/eliminate-callsite-inline.ll b/llvm/test/Transforms/NewGVN/eliminate-callsite-inline.ll
index 748485c..6cf5438 100644
--- a/llvm/test/Transforms/NewGVN/eliminate-callsite-inline.ll
+++ b/llvm/test/Transforms/NewGVN/eliminate-callsite-inline.ll
@@ -1,15 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=inline,newgvn -S < %s | FileCheck %s
 
-; CHECK-LABEL: @f2()
-; CHECK-NEXT:    ret void
 
 define void @f2() {
+; CHECK-LABEL: define void @f2() {
+; CHECK-NEXT:    ret void
+;
   call void @f1()
   call void @f1()
   ret void
 }
 
 define internal void @f1() #1 {
+; CHECK-LABEL: define internal void @f1(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret void
+;
 entry:
   ret void
 }
diff --git a/llvm/test/Transforms/NewGVN/equivalent-phi.ll b/llvm/test/Transforms/NewGVN/equivalent-phi.ll
index 925795d..ba4fc14 100644
--- a/llvm/test/Transforms/NewGVN/equivalent-phi.ll
+++ b/llvm/test/Transforms/NewGVN/equivalent-phi.ll
@@ -11,22 +11,22 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 define i32 @bar(i32 %arg, i32 %arg1, i32 %arg2) #0 {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    br label %bb3
+; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ %arg, %bb ], [ [[TMP:%.*]]15, %bb17 ]
-; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ %arg2, %bb ], [ [[TMP18:%.*]], %bb17 ]
-; CHECK-NEXT:    [[TMP6:%.*]] = phi i32 [ 0, %bb ], [ [[TMP14:%.*]], %bb17 ]
+; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ [[ARG:%.*]], [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB17:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[ARG2:%.*]], [[BB]] ], [ [[TMP18:%.*]], [[BB17]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP14:%.*]], [[BB17]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP]] to i64
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr @global, i64 0, i64 [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP6]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP14]] = add nsw i32 [[TMP10]], [[TMP9]]
-; CHECK-NEXT:    [[TMP15:%.*]] = add nsw i32 [[TMP]], %arg1
-; CHECK-NEXT:    br label %bb17
+; CHECK-NEXT:    [[TMP15]] = add nsw i32 [[TMP]], [[ARG1:%.*]]
+; CHECK-NEXT:    br label [[BB17]]
 ; CHECK:       bb17:
 ; CHECK-NEXT:    [[TMP18]] = add i32 [[TMP4]], -1
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP19]], label %bb3, label %bb20
+; CHECK-NEXT:    br i1 [[TMP19]], label [[BB3]], label [[BB20:%.*]]
 ; CHECK:       bb20:
 ; CHECK-NEXT:    ret i32 [[TMP14]]
 ;
diff --git a/llvm/test/Transforms/NewGVN/fold-const-expr.ll b/llvm/test/Transforms/NewGVN/fold-const-expr.ll
index 2821791..54020b88d 100644
--- a/llvm/test/Transforms/NewGVN/fold-const-expr.ll
+++ b/llvm/test/Transforms/NewGVN/fold-const-expr.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; GVN failed to do constant expression folding and expanded
 ; them unfolded in many places, producing exponentially large const
 ; expressions. As a result, the compilation never fisished.
@@ -6,6 +7,16 @@
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 %2 = type { i32, i32, i32, i32, i32 }
 define i32 @_Z16vector3util_mainv(i32 %x, i32 %y)  {
+; CHECK-LABEL: define i32 @_Z16vector3util_mainv(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [[TMP0]], ptr [[TMP1]], i64 0, i32 1
+; CHECK-NEXT:    store <4 x i32> <i32 234567891, i32 345678912, i32 456789123, i32 0>, ptr [[TMP114]], align 4
+; CHECK-NEXT:    store i32 310393545, ptr [[TMP114]], align 4
+; CHECK-NEXT:    store i32 -383584258, ptr [[TMP114]], align 4
+; CHECK-NEXT:    store i32 -57163022, ptr [[TMP114]], align 4
+; CHECK-NEXT:    ret i32 0
+;
   %tmp1 = alloca %2, align 4
   %tmp114 = getelementptr inbounds %2, ptr %tmp1, i64 0, i32 1
   store <4 x i32> <i32 234567891, i32 345678912, i32 456789123, i32 0>, ptr %tmp114, align 4
@@ -36,7 +47,6 @@ define i32 @_Z16vector3util_mainv(i32 %x, i32 %y)  {
   %tmp1739 = shl i32 %tmp1738, 22
   %tmp1740 = xor i32 %tmp1739, %tmp1738
   store i32 %tmp1740, ptr %tmp1683, align 4
-; CHECK: store i32 310393545, ptr %tmp114, align 4
   %tmp1756 = getelementptr inbounds %2, ptr %tmp1, i64 0, i32 1
   %tmp1761 = load i32, ptr %tmp1756, align 4
   %tmp1766 = shl i32 %tmp1761, 5
@@ -64,7 +74,6 @@ define i32 @_Z16vector3util_mainv(i32 %x, i32 %y)  {
   %tmp1812 = shl i32 %tmp1811, 22
   %tmp1813 = xor i32 %tmp1812, %tmp1811
   store i32 %tmp1813, ptr %tmp1756, align 4
-; CHECK: store i32 -383584258, ptr %tmp114, align 4
   %tmp2645 = getelementptr inbounds %2, ptr %tmp1, i64 0, i32 1
   %tmp2650 = load i32, ptr %tmp2645, align 4
   %tmp2655 = shl i32 %tmp2650, 5
@@ -92,6 +101,5 @@ define i32 @_Z16vector3util_mainv(i32 %x, i32 %y)  {
   %tmp2701 = shl i32 %tmp2700, 22
   %tmp2702 = xor i32 %tmp2701, %tmp2700
   store i32 %tmp2702, ptr %tmp2645, align 4
-; CHECK: store i32 -57163022, ptr %tmp114, align 4
   ret i32 0
 }
diff --git a/llvm/test/Transforms/NewGVN/fpmath.ll b/llvm/test/Transforms/NewGVN/fpmath.ll
index e8cec8a..d936c01 100644
--- a/llvm/test/Transforms/NewGVN/fpmath.ll
+++ b/llvm/test/Transforms/NewGVN/fpmath.ll
@@ -1,10 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 define double @test1(double %x, double %y) {
-; CHECK: @test1(double %x, double %y)
-; CHECK: %add1 = fadd double %x, %y
-; CHECK-NOT: fpmath
-; CHECK: %foo = fadd double %add1, %add1
+; CHECK-LABEL: define double @test1(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[X]], [[Y]]
+; CHECK-NEXT:    [[FOO:%.*]] = fadd double [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret double [[FOO]]
+;
   %add1 = fadd double %x, %y, !fpmath !0
   %add2 = fadd double %x, %y
   %foo = fadd double %add1, %add2
@@ -12,9 +15,12 @@ define double @test1(double %x, double %y) {
 }
 
 define double @test2(double %x, double %y) {
-; CHECK: @test2(double %x, double %y)
-; CHECK: %add1 = fadd double %x, %y, !fpmath ![[MD0:[0-9]+]]
-; CHECK: %foo = fadd double %add1, %add1
+; CHECK-LABEL: define double @test2(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[X]], [[Y]], !fpmath [[META0:![0-9]+]]
+; CHECK-NEXT:    [[FOO:%.*]] = fadd double [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret double [[FOO]]
+;
   %add1 = fadd double %x, %y, !fpmath !0
   %add2 = fadd double %x, %y, !fpmath !0
   %foo = fadd double %add1, %add2
@@ -22,9 +28,12 @@ define double @test2(double %x, double %y) {
 }
 
 define double @test3(double %x, double %y) {
-; CHECK: @test3(double %x, double %y)
-; CHECK: %add1 = fadd double %x, %y, !fpmath ![[MD1:[0-9]+]]
-; CHECK: %foo = fadd double %add1, %add1
+; CHECK-LABEL: define double @test3(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[X]], [[Y]], !fpmath [[META1:![0-9]+]]
+; CHECK-NEXT:    [[FOO:%.*]] = fadd double [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret double [[FOO]]
+;
   %add1 = fadd double %x, %y, !fpmath !1
   %add2 = fadd double %x, %y, !fpmath !0
   %foo = fadd double %add1, %add2
@@ -32,9 +41,12 @@ define double @test3(double %x, double %y) {
 }
 
 define double @test4(double %x, double %y) {
-; CHECK: @test4(double %x, double %y)
-; CHECK: %add1 = fadd double %x, %y, !fpmath ![[MD1]]
-; CHECK: %foo = fadd double %add1, %add1
+; CHECK-LABEL: define double @test4(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[X]], [[Y]], !fpmath [[META1]]
+; CHECK-NEXT:    [[FOO:%.*]] = fadd double [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret double [[FOO]]
+;
   %add1 = fadd double %x, %y, !fpmath !0
   %add2 = fadd double %x, %y, !fpmath !1
   %foo = fadd double %add1, %add2
@@ -42,17 +54,22 @@ define double @test4(double %x, double %y) {
 }
 
 define double @test5(double %x) {
-; CHECK: @test5(double %x)
-; CHECK: %neg1 = fneg double %x, !fpmath ![[MD1]]
-; CHECK: %foo = fadd double %neg1, %neg1
+; CHECK-LABEL: define double @test5(
+; CHECK-SAME: double [[X:%.*]]) {
+; CHECK-NEXT:    [[NEG1:%.*]] = fneg double [[X]], !fpmath [[META1]]
+; CHECK-NEXT:    [[FOO:%.*]] = fadd double [[NEG1]], [[NEG1]]
+; CHECK-NEXT:    ret double [[FOO]]
+;
   %neg1 = fneg double %x, !fpmath !0
   %neg2 = fneg double %x, !fpmath !1
   %foo = fadd double %neg1, %neg2
   ret double %foo
 }
 
-; CHECK: ![[MD0]] = !{float 5.000000e+00}
-; CHECK: ![[MD1]] = !{float 2.500000e+00}
 
 !0 = !{ float 5.0 }
 !1 = !{ float 2.5 }
+;.
+; CHECK: [[META0]] = !{float 5.000000e+00}
+; CHECK: [[META1]] = !{float 2.500000e+00}
+;.
diff --git a/llvm/test/Transforms/NewGVN/funclet.ll b/llvm/test/Transforms/NewGVN/funclet.ll
index 3df3f94..8c1cbd6 100644
--- a/llvm/test/Transforms/NewGVN/funclet.ll
+++ b/llvm/test/Transforms/NewGVN/funclet.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "i686-pc-windows-msvc"
@@ -8,13 +9,35 @@ target triple = "i686-pc-windows-msvc"
 @"_TI1?AUA@@" = external constant %eh.ThrowInfo
 
 define i8 @f() personality ptr @__CxxFrameHandler3 {
+; CHECK-LABEL: define i8 @f() personality ptr @__CxxFrameHandler3 {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[C:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[B]], align 1
+; CHECK-NEXT:    store i8 13, ptr [[C]], align 1
+; CHECK-NEXT:    invoke void @_CxxThrowException(ptr [[B]], ptr nonnull @"_TI1?AUA@@")
+; CHECK-NEXT:            to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
+; CHECK:       catch.dispatch:
+; CHECK-NEXT:    [[CS1:%.*]] = catchswitch within none [label %catch] unwind to caller
+; CHECK:       catch:
+; CHECK-NEXT:    [[CATCHPAD:%.*]] = catchpad within [[CS1]] [ptr null, i32 64, ptr null]
+; CHECK-NEXT:    store i8 5, ptr [[B]], align 1
+; CHECK-NEXT:    catchret from [[CATCHPAD]] to label [[TRY_CONT:%.*]]
+; CHECK:       try.cont:
+; CHECK-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[B]], align 1
+; CHECK-NEXT:    [[LOAD_C:%.*]] = load i8, ptr [[C]], align 1
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD_B]], [[LOAD_C]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+; CHECK:       unreachable:
+; CHECK-NEXT:    unreachable
+;
 entry:
   %b = alloca i8
   %c = alloca i8
   store i8 42, ptr %b
   store i8 13, ptr %c
   invoke void @_CxxThrowException(ptr %b, ptr nonnull @"_TI1?AUA@@")
-          to label %unreachable unwind label %catch.dispatch
+  to label %unreachable unwind label %catch.dispatch
 
 catch.dispatch:                                   ; preds = %entry
   %cs1 = catchswitch within none [label %catch] unwind to caller
@@ -33,11 +56,6 @@ try.cont:                                         ; preds = %catch
 unreachable:                                      ; preds = %entry
   unreachable
 }
-; CHECK-LABEL: define i8 @f(
-; CHECK:       %[[load_b:.*]] = load i8, ptr %b
-; CHECK-NEXT:  %[[load_c:.*]] = load i8, ptr %c
-; CHECK-NEXT:  %[[add:.*]] = add i8 %[[load_b]], %[[load_c]]
-; CHECK-NEXT:  ret i8 %[[add]]
 
 declare i32 @__CxxFrameHandler3(...)
 
diff --git a/llvm/test/Transforms/NewGVN/int_sideeffect.ll b/llvm/test/Transforms/NewGVN/int_sideeffect.ll
index f715d02..a2c54bd 100644
--- a/llvm/test/Transforms/NewGVN/int_sideeffect.ll
+++ b/llvm/test/Transforms/NewGVN/int_sideeffect.ll
@@ -1,27 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -S < %s -passes=newgvn | FileCheck %s
 
 declare void @llvm.sideeffect()
 
 ; Store-to-load forwarding across a @llvm.sideeffect.
 
-; CHECK-LABEL: s2l
-; CHECK-NOT: load
 define float @s2l(ptr %p) {
-    store float 0.0, ptr %p
-    call void @llvm.sideeffect()
-    %t = load float, ptr %p
-    ret float %t
+; CHECK-LABEL: define float @s2l(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    store float 0.000000e+00, ptr [[P]], align 4
+; CHECK-NEXT:    call void @llvm.sideeffect()
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  store float 0.0, ptr %p
+  call void @llvm.sideeffect()
+  %t = load float, ptr %p
+  ret float %t
 }
 
 ; Redundant load elimination across a @llvm.sideeffect.
 
-; CHECK-LABEL: rle
-; CHECK: load
-; CHECK-NOT: load
 define float @rle(ptr %p) {
-    %r = load float, ptr %p
-    call void @llvm.sideeffect()
-    %s = load float, ptr %p
-    %t = fadd float %r, %s
-    ret float %t
+; CHECK-LABEL: define float @rle(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = load float, ptr [[P]], align 4
+; CHECK-NEXT:    call void @llvm.sideeffect()
+; CHECK-NEXT:    [[T:%.*]] = fadd float [[R]], [[R]]
+; CHECK-NEXT:    ret float [[T]]
+;
+  %r = load float, ptr %p
+  call void @llvm.sideeffect()
+  %s = load float, ptr %p
+  %t = fadd float %r, %s
+  ret float %t
 }
diff --git a/llvm/test/Transforms/NewGVN/invariant.group.ll b/llvm/test/Transforms/NewGVN/invariant.group.ll
index 81e733f..7c14059 100644
--- a/llvm/test/Transforms/NewGVN/invariant.group.ll
+++ b/llvm/test/Transforms/NewGVN/invariant.group.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 
 %struct.A = type { ptr }
@@ -6,86 +7,131 @@
 
 @unknownPtr = external global i8
 
-; CHECK-LABEL: define i8 @simple() {
 define i8 @simple() {
+; CHECK-LABEL: define i8 @simple() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0:![0-9]+]]
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    ret i8 42
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    call void @foo(ptr %ptr)
-
-    %a = load i8, ptr %ptr, !invariant.group !0
-    %b = load i8, ptr %ptr, !invariant.group !0
-    %c = load i8, ptr %ptr, !invariant.group !0
-; CHECK: ret i8 42
-    ret i8 %a
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  call void @foo(ptr %ptr)
+
+  %a = load i8, ptr %ptr, !invariant.group !0
+  %b = load i8, ptr %ptr, !invariant.group !0
+  %c = load i8, ptr %ptr, !invariant.group !0
+  ret i8 %a
 }
 
-; CHECK-LABEL: define i8 @optimizable1() {
 define i8 @optimizable1() {
+; CHECK-LABEL: define i8 @optimizable1() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    [[PTR2:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[PTR]])
+; CHECK-NEXT:    call void @foo(ptr [[PTR2]])
+; CHECK-NEXT:    ret i8 42
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    %ptr2 = call ptr @llvm.launder.invariant.group.p0(ptr %ptr)
-    %a = load i8, ptr %ptr, !invariant.group !0
-    
-    call void @foo(ptr %ptr2); call to use %ptr2
-; CHECK: ret i8 42
-    ret i8 %a
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  %ptr2 = call ptr @llvm.launder.invariant.group.p0(ptr %ptr)
+  %a = load i8, ptr %ptr, !invariant.group !0
+
+  call void @foo(ptr %ptr2); call to use %ptr2
+  ret i8 %a
 }
 
-; CHECK-LABEL: define i8 @optimizable2() {
 define i8 @optimizable2() {
+; CHECK-LABEL: define i8 @optimizable2() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    store i8 13, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @bar(i8 13)
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    ret i8 42
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    call void @foo(ptr %ptr)
-    
-    store i8 13, ptr %ptr ; can't use this store with invariant.group
-    %a = load i8, ptr %ptr 
-    call void @bar(i8 %a) ; call to use %a
-    
-    call void @foo(ptr %ptr)
-    %b = load i8, ptr %ptr, !invariant.group !0
-    
-; CHECK: ret i8 42
-    ret i8 %b
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  call void @foo(ptr %ptr)
+
+  store i8 13, ptr %ptr ; can't use this store with invariant.group
+  %a = load i8, ptr %ptr
+  call void @bar(i8 %a) ; call to use %a
+
+  call void @foo(ptr %ptr)
+  %b = load i8, ptr %ptr, !invariant.group !0
+
+  ret i8 %b
 }
 
-; CHECK-LABEL: define i1 @proveEqualityForStrip(
 define i1 @proveEqualityForStrip(ptr %a) {
+; CHECK-LABEL: define i1 @proveEqualityForStrip(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret i1 true
+;
   %b1 = call ptr @llvm.strip.invariant.group.p0(ptr %a)
-; CHECK-NOT: llvm.strip.invariant.group
   %b2 = call ptr @llvm.strip.invariant.group.p0(ptr %a)
   %r = icmp eq ptr %b1, %b2
-; CHECK: ret i1 true
   ret i1 %r
 }
 
-; CHECK-LABEL: define i8 @unoptimizable1() {
 define i8 @unoptimizable1() {
+; CHECK-LABEL: define i8 @unoptimizable1() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[A:%.*]] = load i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    ret i8 [[A]]
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr
-    call void @foo(ptr %ptr)
-    %a = load i8, ptr %ptr, !invariant.group !0
-; CHECK: ret i8 %a
-    ret i8 %a
+  %ptr = alloca i8
+  store i8 42, ptr %ptr
+  call void @foo(ptr %ptr)
+  %a = load i8, ptr %ptr, !invariant.group !0
+  ret i8 %a
 }
 
 ; NewGVN doesn't support assumes.
-; CHECK-LABEL: define void @indirectLoads() {
 define void @indirectLoads() {
+; CHECK-LABEL: define void @indirectLoads() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @getPointer(ptr null)
+; CHECK-NEXT:    call void @_ZN1AC1Ev(ptr [[CALL]])
+; CHECK-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[CALL]], align 8, !invariant.group [[META0]]
+; CHECK-NEXT:    [[CMP_VTABLES:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2)
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_VTABLES]])
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; CHECK-NEXT:    call void [[TMP0]](ptr [[CALL]])
+; CHECK-NEXT:    [[VTABLE2:%.*]] = load ptr, ptr [[CALL]], align 8, !invariant.group [[META0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VTABLE2]], align 8
+; CHECK-NEXT:    call void [[TMP1]](ptr [[CALL]])
+; CHECK-NEXT:    [[VTABLE4:%.*]] = load ptr, ptr [[CALL]], align 8, !invariant.group [[META0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[VTABLE4]], align 8
+; CHECK-NEXT:    call void [[TMP2]](ptr [[CALL]])
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; CHECK-NEXT:    call void [[TMP3]](ptr [[CALL]])
+; CHECK-NEXT:    ret void
+;
 entry:
   %a = alloca ptr, align 8
-  
-  %call = call ptr @getPointer(ptr null) 
+
+  %call = call ptr @getPointer(ptr null)
   call void @_ZN1AC1Ev(ptr %call)
-  
-; CHECK: %vtable = load {{.*}} !invariant.group
+
   %vtable = load ptr, ptr %call, align 8, !invariant.group !0
   %cmp.vtables = icmp eq ptr %vtable, getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2)
   call void @llvm.assume(i1 %cmp.vtables)
-  
+
   store ptr %call, ptr %a, align 8
   %0 = load ptr, ptr %a, align 8
 
@@ -98,36 +144,45 @@ entry:
 ; FIXME: call void @_ZN1A3fooEv(
   %vtable2 = load ptr, ptr %2, align 8, !invariant.group !0
   %3 = load ptr, ptr %vtable2, align 8
-  
+
   call void %3(ptr %2)
   %4 = load ptr, ptr %a, align 8
-  
+
   %vtable4 = load ptr, ptr %4, align 8, !invariant.group !0
   %5 = load ptr, ptr %vtable4, align 8
 ; FIXME: call void @_ZN1A3fooEv(
   call void %5(ptr %4)
- 
+
   %vtable5 = load ptr, ptr %call, align 8, !invariant.group !0
   %6 = load ptr, ptr %vtable5, align 8
 ; FIXME: call void @_ZN1A3fooEv(
   call void %6(ptr %4)
-  
+
   ret void
 }
 
 ; NewGVN won't CSE loads with different pointee types.
-; CHECK-LABEL: define void @combiningBitCastWithLoad() {
 define void @combiningBitCastWithLoad() {
+; CHECK-LABEL: define void @combiningBitCastWithLoad() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @getPointer(ptr null)
+; CHECK-NEXT:    call void @_ZN1AC1Ev(ptr [[CALL]])
+; CHECK-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[CALL]], align 8, !invariant.group [[META0]]
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; CHECK-NEXT:    call void [[TMP0]](ptr [[CALL]])
+; CHECK-NEXT:    ret void
+;
 entry:
   %a = alloca ptr, align 8
-  
-  %call = call ptr @getPointer(ptr null) 
+
+  %call = call ptr @getPointer(ptr null)
   call void @_ZN1AC1Ev(ptr %call)
-  
-; CHECK: %vtable = load {{.*}} !invariant.group
+
   %vtable = load ptr, ptr %call, align 8, !invariant.group !0
   %cmp.vtables = icmp eq ptr %vtable, getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2)
-  
+
   store ptr %call, ptr %a, align 8
 ; FIXME-NOT: !invariant.group
   %0 = load ptr, ptr %a, align 8
@@ -139,173 +194,245 @@ entry:
   ret void
 }
 
-; CHECK-LABEL:define void @loadCombine() {
 define void @loadCombine() {
+; CHECK-LABEL: define void @loadCombine() {
+; CHECK-NEXT:  enter:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[A:%.*]] = load i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[A]])
+; CHECK-NEXT:    call void @bar(i8 [[A]])
+; CHECK-NEXT:    ret void
+;
 enter:
   %ptr = alloca i8
   store i8 42, ptr %ptr
   call void @foo(ptr %ptr)
-; CHECK: %[[A:.*]] = load i8, ptr %ptr, align 1, !invariant.group
   %a = load i8, ptr %ptr, !invariant.group !0
-; CHECK-NOT: load
   %b = load i8, ptr %ptr, !invariant.group !0
-; CHECK: call void @bar(i8 %[[A]])
   call void @bar(i8 %a)
-; CHECK: call void @bar(i8 %[[A]])
   call void @bar(i8 %b)
   ret void
 }
 
-; CHECK-LABEL: define void @loadCombine1() {
 define void @loadCombine1() {
+; CHECK-LABEL: define void @loadCombine1() {
+; CHECK-NEXT:  enter:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[C:%.*]] = load i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[C]])
+; CHECK-NEXT:    call void @bar(i8 [[C]])
+; CHECK-NEXT:    ret void
+;
 enter:
   %ptr = alloca i8
   store i8 42, ptr %ptr
   call void @foo(ptr %ptr)
-; CHECK: %[[D:.*]] = load i8, ptr %ptr, align 1, !invariant.group
   %c = load i8, ptr %ptr
-; CHECK-NOT: load
   %d = load i8, ptr %ptr, !invariant.group !0
-; CHECK: call void @bar(i8 %[[D]])
   call void @bar(i8 %c)
-; CHECK: call void @bar(i8 %[[D]])
   call void @bar(i8 %d)
   ret void
 }
 
-; CHECK-LABEL: define void @loadCombine2() {    
 define void @loadCombine2() {
+; CHECK-LABEL: define void @loadCombine2() {
+; CHECK-NEXT:  enter:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[E:%.*]] = load i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[E]])
+; CHECK-NEXT:    call void @bar(i8 [[E]])
+; CHECK-NEXT:    ret void
+;
 enter:
   %ptr = alloca i8
   store i8 42, ptr %ptr
   call void @foo(ptr %ptr)
-; CHECK: %[[E:.*]] = load i8, ptr %ptr, align 1, !invariant.group
   %e = load i8, ptr %ptr, !invariant.group !0
-; CHECK-NOT: load
   %f = load i8, ptr %ptr
-; CHECK: call void @bar(i8 %[[E]])
   call void @bar(i8 %e)
-; CHECK: call void @bar(i8 %[[E]])
   call void @bar(i8 %f)
   ret void
 }
 
-; CHECK-LABEL: define void @loadCombine3() {
 define void @loadCombine3() {
+; CHECK-LABEL: define void @loadCombine3() {
+; CHECK-NEXT:  enter:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[E:%.*]] = load i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[E]])
+; CHECK-NEXT:    call void @bar(i8 [[E]])
+; CHECK-NEXT:    ret void
+;
 enter:
   %ptr = alloca i8
   store i8 42, ptr %ptr
   call void @foo(ptr %ptr)
-; CHECK: %[[E:.*]] = load i8, ptr %ptr, align 1, !invariant.group
   %e = load i8, ptr %ptr, !invariant.group !0
-; CHECK-NOT: load
   %f = load i8, ptr %ptr, !invariant.group !0
-; CHECK: call void @bar(i8 %[[E]])
   call void @bar(i8 %e)
-; CHECK: call void @bar(i8 %[[E]])
   call void @bar(i8 %f)
   ret void
 }
 
-; CHECK-LABEL: define i8 @unoptimizable2() {
 define i8 @unoptimizable2() {
+; CHECK-LABEL: define i8 @unoptimizable2() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[A:%.*]] = load i8, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    ret i8 [[A]]
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr
-    call void @foo(ptr %ptr)
-    %a = load i8, ptr %ptr
-    call void @foo(ptr %ptr)
-    %b = load i8, ptr %ptr, !invariant.group !0
-    
-; CHECK: ret i8 %a
-    ret i8 %a
+  %ptr = alloca i8
+  store i8 42, ptr %ptr
+  call void @foo(ptr %ptr)
+  %a = load i8, ptr %ptr
+  call void @foo(ptr %ptr)
+  %b = load i8, ptr %ptr, !invariant.group !0
+
+  ret i8 %a
 }
 
-; CHECK-LABEL: define i8 @unoptimizable3() {
 define i8 @unoptimizable3() {
+; CHECK-LABEL: define i8 @unoptimizable3() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    [[PTR2:%.*]] = call ptr @getPointer(ptr [[PTR]])
+; CHECK-NEXT:    [[A:%.*]] = load i8, ptr [[PTR2]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    ret i8 [[A]]
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    %ptr2 = call ptr @getPointer(ptr %ptr)
-    %a = load i8, ptr %ptr2, !invariant.group !0
-    
-; CHECK: ret i8 %a
-    ret i8 %a
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  %ptr2 = call ptr @getPointer(ptr %ptr)
+  %a = load i8, ptr %ptr2, !invariant.group !0
+
+  ret i8 %a
 }
 
 ; NewGVN cares about the launder for some reason.
-; CHECK-LABEL: define i8 @optimizable4() {
 define i8 @optimizable4() {
+; CHECK-LABEL: define i8 @optimizable4() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1
+; CHECK-NEXT:    [[PTR2:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[PTR]])
+; CHECK-NEXT:    [[A:%.*]] = load i8, ptr [[PTR2]], align 1
+; CHECK-NEXT:    ret i8 [[A]]
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr
-    %ptr2 = call ptr @llvm.launder.invariant.group.p0(ptr %ptr)
+  %ptr = alloca i8
+  store i8 42, ptr %ptr
+  %ptr2 = call ptr @llvm.launder.invariant.group.p0(ptr %ptr)
 ; FIXME-NOT: load
-    %a = load i8, ptr %ptr2
-    
+  %a = load i8, ptr %ptr2
+
 ; FIXME: ret i8 42
-    ret i8 %a
+  ret i8 %a
 }
 
-; CHECK-LABEL: define i8 @volatile1() {
 define i8 @volatile1() {
+; CHECK-LABEL: define i8 @volatile1() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[B:%.*]] = load volatile i8, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @bar(i8 [[B]])
+; CHECK-NEXT:    [[C:%.*]] = load volatile i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[C]])
+; CHECK-NEXT:    ret i8 42
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    call void @foo(ptr %ptr)
-    %a = load i8, ptr %ptr, !invariant.group !0
-    %b = load volatile i8, ptr %ptr
-; CHECK: call void @bar(i8 %b)
-    call void @bar(i8 %b)
-
-    %c = load volatile i8, ptr %ptr, !invariant.group !0
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  call void @foo(ptr %ptr)
+  %a = load i8, ptr %ptr, !invariant.group !0
+  %b = load volatile i8, ptr %ptr
+  call void @bar(i8 %b)
+
+  %c = load volatile i8, ptr %ptr, !invariant.group !0
 ; We might be able to optimize this, but nobody cares
-; CHECK: call void @bar(i8 %c)
-    call void @bar(i8 %c)
-; CHECK: ret i8 42
-    ret i8 %a
+  call void @bar(i8 %c)
+  ret i8 %a
 }
 
-; CHECK-LABEL: define i8 @volatile2() {
 define i8 @volatile2() {
+; CHECK-LABEL: define i8 @volatile2() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[B:%.*]] = load volatile i8, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @bar(i8 [[B]])
+; CHECK-NEXT:    [[C:%.*]] = load volatile i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[C]])
+; CHECK-NEXT:    ret i8 42
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    call void @foo(ptr %ptr)
-    %a = load i8, ptr %ptr, !invariant.group !0
-    %b = load volatile i8, ptr %ptr
-; CHECK: call void @bar(i8 %b)
-    call void @bar(i8 %b)
-
-    %c = load volatile i8, ptr %ptr, !invariant.group !0
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  call void @foo(ptr %ptr)
+  %a = load i8, ptr %ptr, !invariant.group !0
+  %b = load volatile i8, ptr %ptr
+  call void @bar(i8 %b)
+
+  %c = load volatile i8, ptr %ptr, !invariant.group !0
 ; We might be able to optimize this, but nobody cares
-; CHECK: call void @bar(i8 %c)
-    call void @bar(i8 %c)
-; CHECK: ret i8 42
-    ret i8 %a
+  call void @bar(i8 %c)
+  ret i8 %a
 }
 
-; CHECK-LABEL: define void @fun() {
 define void @fun() {
+; CHECK-LABEL: define void @fun() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    call void @bar(i8 42)
+; CHECK-NEXT:    ret void
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    call void @foo(ptr %ptr)
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  call void @foo(ptr %ptr)
 
-    %a = load i8, ptr %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change
-; CHECK: call void @bar(i8 42)
-    call void @bar(i8 %a)
+  %a = load i8, ptr %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change
+  call void @bar(i8 %a)
 
-    ret void
+  ret void
 }
 
 ; FIXME: NewGVN doesn't run instsimplify on a load from a vtable definition?
 ; This test checks if invariant.group understands gep with zeros
-; CHECK-LABEL: define void @testGEP0() {
 define void @testGEP0() {
+; CHECK-LABEL: define void @testGEP0() {
+; CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 8
+; CHECK-NEXT:    store ptr getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2), ptr [[A]], align 8, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @_ZN1A3fooEv(ptr nonnull dereferenceable(8) [[A]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @unknownPtr, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[_Z1GR1A_EXIT:%.*]], label [[TMP3:%.*]]
+; CHECK:       3:
+; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2), align 8
+; CHECK-NEXT:    call void [[TMP4]](ptr nonnull [[A]])
+; CHECK-NEXT:    br label [[_Z1GR1A_EXIT]]
+; CHECK:       _Z1gR1A.exit:
+; CHECK-NEXT:    ret void
+;
   %a = alloca %struct.A, align 8
   store ptr getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2), ptr %a, align 8, !invariant.group !0
-; CHECK: call void @_ZN1A3fooEv(ptr nonnull dereferenceable(8) %a)
   call void @_ZN1A3fooEv(ptr nonnull dereferenceable(8) %a) ; This call may change vptr
   %1 = load i8, ptr @unknownPtr, align 4
   %2 = icmp eq i8 %1, 0
@@ -325,54 +452,93 @@ _Z1gR1A.exit:                                     ; preds = %0, %3
 ; Check if no optimizations are performed with global pointers.
 ; FIXME: we could do the optimizations if we would check if dependency comes
 ; from the same function.
-; CHECK-LABEL: define void @testGlobal() {
 define void @testGlobal() {
-; CHECK:  %a = load i8, ptr @unknownPtr, align 1, !invariant.group !0
-   %a = load i8, ptr @unknownPtr, !invariant.group !0
-   call void @foo2(ptr @unknownPtr, i8 %a)
-; CHECK:  %1 = load i8, ptr @unknownPtr, align 1, !invariant.group !0
-   %1 = load i8, ptr @unknownPtr, !invariant.group !0
-   call void @bar(i8 %1)
-
-   call void @fooBit(ptr @unknownPtr, i1 1)
+; CHECK-LABEL: define void @testGlobal() {
+; CHECK-NEXT:    [[A:%.*]] = load i8, ptr @unknownPtr, align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @foo2(ptr @unknownPtr, i8 [[A]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @unknownPtr, align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[TMP1]])
+; CHECK-NEXT:    call void @fooBit(ptr @unknownPtr, i1 true)
+; CHECK-NEXT:    [[TMP2:%.*]] = load i1, ptr @unknownPtr, align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @fooBit(ptr @unknownPtr, i1 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = load i1, ptr @unknownPtr, align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @fooBit(ptr @unknownPtr, i1 [[TMP3]])
+; CHECK-NEXT:    ret void
+;
+  %a = load i8, ptr @unknownPtr, !invariant.group !0
+  call void @foo2(ptr @unknownPtr, i8 %a)
+  %1 = load i8, ptr @unknownPtr, !invariant.group !0
+  call void @bar(i8 %1)
+
+  call void @fooBit(ptr @unknownPtr, i1 1)
 ; Adding regex because of canonicalization of bitcasts
-; CHECK: %2 = load i1, ptr {{.*}}, !invariant.group !0
-   %2 = load i1, ptr @unknownPtr, !invariant.group !0
-   call void @fooBit(ptr @unknownPtr, i1 %2)
-; CHECK:  %3 = load i1, ptr {{.*}}, !invariant.group !0
-   %3 = load i1, ptr @unknownPtr, !invariant.group !0
-   call void @fooBit(ptr @unknownPtr, i1 %3)
-   ret void
+  %2 = load i1, ptr @unknownPtr, !invariant.group !0
+  call void @fooBit(ptr @unknownPtr, i1 %2)
+  %3 = load i1, ptr @unknownPtr, !invariant.group !0
+  call void @fooBit(ptr @unknownPtr, i1 %3)
+  ret void
 }
 
 ; Might be similar to above where NewGVN doesn't handle loads of different types from the same location.
 ; Not super important anyway.
-; CHECK-LABEL: define void @testTrunc() {
 define void @testTrunc() {
-   %a = alloca i8
-   call void @foo(ptr %a)
-; CHECK:  %b = load i8, ptr %a, align 1, !invariant.group !0
-   %b = load i8, ptr %a, !invariant.group !0
-   call void @foo2(ptr %a, i8 %b)
-
-   %1 = load i8, ptr %a, !invariant.group !0
-; CHECK: call void @bar(i8 %b)
-   call void @bar(i8 %1)
-
-   call void @fooBit(ptr %a, i1 1)
+; CHECK-LABEL: define void @testTrunc() {
+; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    call void @foo(ptr [[A]])
+; CHECK-NEXT:    [[B:%.*]] = load i8, ptr [[A]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @foo2(ptr [[A]], i8 [[B]])
+; CHECK-NEXT:    call void @bar(i8 [[B]])
+; CHECK-NEXT:    call void @fooBit(ptr [[A]], i1 true)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i1, ptr [[A]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @fooBit(ptr [[A]], i1 [[TMP1]])
+; CHECK-NEXT:    call void @fooBit(ptr [[A]], i1 [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %a = alloca i8
+  call void @foo(ptr %a)
+  %b = load i8, ptr %a, !invariant.group !0
+  call void @foo2(ptr %a, i8 %b)
+
+  %1 = load i8, ptr %a, !invariant.group !0
+  call void @bar(i8 %1)
+
+  call void @fooBit(ptr %a, i1 1)
 ; FIXME: %1 = trunc i8 %b to i1
-   %2 = load i1, ptr %a, !invariant.group !0
+  %2 = load i1, ptr %a, !invariant.group !0
 ; FIXME-NEXT: call void @fooBit(ptr %a, i1 %1)
-   call void @fooBit(ptr %a, i1 %2)
-   %3 = load i1, ptr %a, !invariant.group !0
+  call void @fooBit(ptr %a, i1 %2)
+  %3 = load i1, ptr %a, !invariant.group !0
 ; FIXME-NEXT: call void @fooBit(ptr %a, i1 %1)
-   call void @fooBit(ptr %a, i1 %3)
-   ret void
+  call void @fooBit(ptr %a, i1 %3)
+  ret void
 }
 
 ; See comment in @testGEP0 on what NewGVN is lacking.
-; CHECK-LABEL: define void @handling_loops()
 define void @handling_loops() {
+; CHECK-LABEL: define void @handling_loops() {
+; CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 8
+; CHECK-NEXT:    store ptr getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2), ptr [[A]], align 8, !invariant.group [[META0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @unknownPtr, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[DOTLR_PH_I:%.*]], label [[_Z2G2R1A_EXIT:%.*]]
+; CHECK:       .lr.ph.i:
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i8 [[TMP1]], 1
+; CHECK-NEXT:    br i1 [[TMP3]], label [[DOT_CRIT_EDGE_PREHEADER:%.*]], label [[_Z2G2R1A_EXIT]]
+; CHECK:       ._crit_edge.preheader:
+; CHECK-NEXT:    br label [[DOT_CRIT_EDGE:%.*]]
+; CHECK:       ._crit_edge:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i8 [ [[TMP6:%.*]], [[DOT_CRIT_EDGE]] ], [ 1, [[DOT_CRIT_EDGE_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2), align 8
+; CHECK-NEXT:    call void [[TMP5]](ptr nonnull [[A]])
+; CHECK-NEXT:    [[TMP6]] = add nuw nsw i8 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr @unknownPtr, align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[DOT_CRIT_EDGE]], label [[_Z2G2R1A_EXIT_LOOPEXIT:%.*]]
+; CHECK:       _Z2g2R1A.exit.loopexit:
+; CHECK-NEXT:    br label [[_Z2G2R1A_EXIT]]
+; CHECK:       _Z2g2R1A.exit:
+; CHECK-NEXT:    ret void
+;
   %a = alloca %struct.A, align 8
   store ptr getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2), ptr %a, align 8, !invariant.group !0
   %1 = load i8, ptr @unknownPtr, align 4
@@ -424,3 +590,6 @@ declare void @llvm.assume(i1 %cmp.vtables) #0
 
 attributes #0 = { nounwind }
 !0 = !{}
+;.
+; CHECK: [[META0]] = !{}
+;.
diff --git a/llvm/test/Transforms/NewGVN/invariant.start.ll b/llvm/test/Transforms/NewGVN/invariant.start.ll
index 100b79f..9bf1c55 100644
--- a/llvm/test/Transforms/NewGVN/invariant.start.ll
+++ b/llvm/test/Transforms/NewGVN/invariant.start.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; Test to make sure llvm.invariant.start calls are not treated as clobbers.
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 
@@ -7,10 +8,12 @@ declare void @llvm.invariant.end.p0(ptr, i64, ptr nocapture) nounwind
 
 ; We forward store to the load across the invariant.start intrinsic
 define i8 @forward_store() {
-; CHECK-LABEL: @forward_store
-; CHECK: call ptr @llvm.invariant.start.p0(i64 1, ptr %a)
-; CHECK-NOT: load
-; CHECK: ret i8 0
+; CHECK-LABEL: define i8 @forward_store() {
+; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 0, ptr [[A]], align 1
+; CHECK-NEXT:    [[I:%.*]] = call ptr @llvm.invariant.start.p0(i64 1, ptr [[A]])
+; CHECK-NEXT:    ret i8 0
+;
   %a = alloca i8
   store i8 0, ptr %a
   %i = call ptr @llvm.invariant.start.p0(i64 1, ptr %a)
@@ -23,10 +26,18 @@ declare i8 @dummy(ptr nocapture) nounwind readonly
 ; We forward store to the load in the non-local analysis case,
 ; i.e. invariant.start is in another basic block.
 define i8 @forward_store_nonlocal(i1 %cond) {
-; CHECK-LABEL: forward_store_nonlocal
-; CHECK: call ptr @llvm.invariant.start.p0(i64 1, ptr %a)
-; CHECK: ret i8 0
-; CHECK: ret i8 %val
+; CHECK-LABEL: define i8 @forward_store_nonlocal(
+; CHECK-SAME: i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 0, ptr [[A]], align 1
+; CHECK-NEXT:    [[I:%.*]] = call ptr @llvm.invariant.start.p0(i64 1, ptr [[A]])
+; CHECK-NEXT:    br i1 [[COND]], label [[LOADBLOCK:%.*]], label [[EXIT:%.*]]
+; CHECK:       loadblock:
+; CHECK-NEXT:    ret i8 0
+; CHECK:       exit:
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @dummy(ptr [[A]])
+; CHECK-NEXT:    ret i8 [[VAL]]
+;
   %a = alloca i8
   store i8 0, ptr %a
   %i = call ptr @llvm.invariant.start.p0(i64 1, ptr %a)
@@ -43,12 +54,14 @@ exit:
 
 ; We should not value forward %foo to the invariant.end corresponding to %bar.
 define i8 @forward_store1() {
-; CHECK-LABEL: forward_store1
-; CHECK: %foo = call ptr @llvm.invariant.start.p0
-; CHECK-NOT: load
-; CHECK: %bar = call ptr @llvm.invariant.start.p0
-; CHECK: call void @llvm.invariant.end.p0(ptr %bar, i64 1, ptr %a)
-; CHECK: ret i8 0
+; CHECK-LABEL: define i8 @forward_store1() {
+; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 0, ptr [[A]], align 1
+; CHECK-NEXT:    [[FOO:%.*]] = call ptr @llvm.invariant.start.p0(i64 1, ptr [[A]])
+; CHECK-NEXT:    [[BAR:%.*]] = call ptr @llvm.invariant.start.p0(i64 1, ptr [[A]])
+; CHECK-NEXT:    call void @llvm.invariant.end.p0(ptr [[BAR]], i64 1, ptr [[A]])
+; CHECK-NEXT:    ret i8 0
+;
   %a = alloca i8
   store i8 0, ptr %a
   %foo = call ptr @llvm.invariant.start.p0(i64 1, ptr %a)
diff --git a/llvm/test/Transforms/NewGVN/lifetime-simple.ll b/llvm/test/Transforms/NewGVN/lifetime-simple.ll
index 5d31101..55e4611 100644
--- a/llvm/test/Transforms/NewGVN/lifetime-simple.ll
+++ b/llvm/test/Transforms/NewGVN/lifetime-simple.ll
@@ -1,12 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
 define i8 @test(ptr %P) nounwind {
-; CHECK: lifetime.start
-; CHECK-NOT: load
-; CHECK: lifetime.end
+; CHECK-LABEL: define i8 @test(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr [[P]])
+; CHECK-NEXT:    store i8 1, ptr [[P]], align 1
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr [[P]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P]], align 1
+; CHECK-NEXT:    ret i8 [[TMP0]]
+;
 entry:
   call void @llvm.lifetime.start.p0(i64 32, ptr %P)
   %0 = load i8, ptr %P
diff --git a/llvm/test/Transforms/NewGVN/load-constant-mem.ll b/llvm/test/Transforms/NewGVN/load-constant-mem.ll
index 06439c59..ae91147 100644
--- a/llvm/test/Transforms/NewGVN/load-constant-mem.ll
+++ b/llvm/test/Transforms/NewGVN/load-constant-mem.ll
@@ -7,14 +7,14 @@ define i32 @test(ptr %p, i32 %i) nounwind {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P:%.*]] = getelementptr [4 x i32], ptr @G, i32 0, i32 [[I:%.*]]
-; CHECK-NEXT:    store i8 4, ptr [[P:%.*]]
+; CHECK-NEXT:    store i8 4, ptr [[P1:%.*]], align 1
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
-  %P = getelementptr [4 x i32], ptr @G, i32 0, i32 %i
-  %A = load i32, ptr %P
+  %p.i = getelementptr [4 x i32], ptr @G, i32 0, i32 %i
+  %A = load i32, ptr %p.i
   store i8 4, ptr %p
-  %B = load i32, ptr %P
+  %B = load i32, ptr %p.i
   %C = sub i32 %A, %B
   ret i32 %C
 }
diff --git a/llvm/test/Transforms/NewGVN/load-from-unreachable-predecessor.ll b/llvm/test/Transforms/NewGVN/load-from-unreachable-predecessor.ll
index 74cb700..3ca9b9e 100644
--- a/llvm/test/Transforms/NewGVN/load-from-unreachable-predecessor.ll
+++ b/llvm/test/Transforms/NewGVN/load-from-unreachable-predecessor.ll
@@ -1,12 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 ; Check that an unreachable predecessor to a PHI node doesn't cause a crash.
 ; PR21625.
 
 define i32 @f(ptr %f) {
-; CHECK: bb0:
+; CHECK-LABEL: define i32 @f(
+; CHECK-SAME: ptr [[F:%.*]]) {
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
+; CHECK-NEXT:    br i1 false, label [[BB1:%.*]], label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = load i32, ptr null, align 4
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
 ; Load should be removed, since it's ignored.
-; CHECK-NEXT: br label
 bb0:
   %bar = load ptr, ptr %f
   br label %bb2
diff --git a/llvm/test/Transforms/NewGVN/loadforward.ll b/llvm/test/Transforms/NewGVN/loadforward.ll
index d8a9022..85ceafd 100644
--- a/llvm/test/Transforms/NewGVN/loadforward.ll
+++ b/llvm/test/Transforms/NewGVN/loadforward.ll
@@ -9,8 +9,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ;; Test that we forward the first store to the second load
 define i16 @bazinga() {
 ; CHECK-LABEL: @bazinga(
-; CHECK-NEXT:    [[_TMP10:%.*]] = load i16, ptr getelementptr inbounds (%rec11, ptr @str, i64 0, i32 1)
-; CHECK-NEXT:    store i16 [[_TMP10]], ptr @str
+; CHECK-NEXT:    [[_TMP10:%.*]] = load i16, ptr getelementptr inbounds ([[REC11:%.*]], ptr @str, i64 0, i32 1), align 2
+; CHECK-NEXT:    store i16 [[_TMP10]], ptr @str, align 2
 ; CHECK-NEXT:    [[_TMP15:%.*]] = icmp eq i16 [[_TMP10]], 3
 ; CHECK-NEXT:    [[_TMP16:%.*]] = select i1 [[_TMP15]], i16 1, i16 0
 ; CHECK-NEXT:    br label [[BB1:%.*]]
diff --git a/llvm/test/Transforms/NewGVN/malloc-load-removal.ll b/llvm/test/Transforms/NewGVN/malloc-load-removal.ll
index b487acf..3e11d92 100644
--- a/llvm/test/Transforms/NewGVN/malloc-load-removal.ll
+++ b/llvm/test/Transforms/NewGVN/malloc-load-removal.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -S -passes=newgvn < %s | FileCheck %s
 ; PR13694
 
@@ -7,6 +8,17 @@ target triple = "x86_64-apple-macosx10.8.0"
 declare ptr @malloc(i64) nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc"
 
 define noalias ptr @test1() nounwind uwtable ssp {
+; CHECK-LABEL: define noalias ptr @test1(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @malloc(i64 100) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    br i1 undef, label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    store i8 0, ptr [[CALL]], align 1
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret ptr [[CALL]]
+;
 entry:
   %call = tail call ptr @malloc(i64 100) nounwind
   %0 = load i8, ptr %call, align 1
@@ -20,14 +32,22 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %if.then, %entry
   ret ptr %call
 
-; CHECK-LABEL: @test1(
-; CHECK-NOT: load
-; CHECK-NOT: icmp
 }
 
 declare ptr @_Znwm(i64) nounwind
 
 define noalias ptr @test2() nounwind uwtable ssp {
+; CHECK-LABEL: define noalias ptr @test2(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @_Znwm(i64 100) #[[ATTR2]]
+; CHECK-NEXT:    br i1 undef, label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    store i8 0, ptr [[CALL]], align 1
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret ptr [[CALL]]
+;
 entry:
   %call = tail call ptr @_Znwm(i64 100) nounwind
   %0 = load i8, ptr %call, align 1
@@ -41,14 +61,22 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %if.then, %entry
   ret ptr %call
 
-; CHECK-LABEL: @test2(
-; CHECK-NOT: load
-; CHECK-NOT: icmp
 }
 
 declare ptr @aligned_alloc(i64 allocalign, i64) nounwind allockind("alloc,uninitialized,aligned") allocsize(1) "alloc-family"="malloc"
 
 define noalias ptr @test3() nounwind uwtable ssp {
+; CHECK-LABEL: define noalias ptr @test3(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @aligned_alloc(i64 256, i64 32) #[[ATTR2]]
+; CHECK-NEXT:    br i1 undef, label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    store i8 0, ptr [[CALL]], align 1
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret ptr [[CALL]]
+;
 entry:
   %call = tail call ptr @aligned_alloc(i64 256, i64 32) nounwind
   %0 = load i8, ptr %call, align 32
@@ -62,7 +90,4 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %if.then, %entry
   ret ptr %call
 
-; CHECK-LABEL: @test3(
-; CHECK-NOT: load
-; CHECK-NOT: icmp
 }
diff --git a/llvm/test/Transforms/NewGVN/memory-handling.ll b/llvm/test/Transforms/NewGVN/memory-handling.ll
index b1e1c4f..c72ff74 100644
--- a/llvm/test/Transforms/NewGVN/memory-handling.ll
+++ b/llvm/test/Transforms/NewGVN/memory-handling.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ;; This test is really dependent on propagating a lot of memory info around, but in the end, not
 ;; screwing up a single add.
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
@@ -20,6 +21,121 @@ declare ptr @__ctype_b_loc() local_unnamed_addr #1
 
 ; Function Attrs: nounwind uwtable
 define void @BuildMask(ptr nocapture readonly) local_unnamed_addr #0 {
+; CHECK-LABEL: define void @BuildMask(
+; CHECK-SAME: ptr nocapture readonly [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 16 @alPhrase, i8 0, i64 416, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 16 @aqMainMask, i8 0, i64 16, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 16 @aqMainSign, i8 0, i64 16, i1 false)
+; CHECK-NEXT:    br label [[DOTSINK_SPLIT:%.*]]
+; CHECK:       .sink.split:
+; CHECK-NEXT:    [[DOT0:%.*]] = phi ptr [ [[TMP0]], [[TMP1:%.*]] ], [ [[TMP3:%.*]], [[TMP14:%.*]] ]
+; CHECK-NEXT:    [[DOTSINK:%.*]] = phi i32 [ 0, [[TMP1]] ], [ [[TMP22:%.*]], [[TMP14]] ]
+; CHECK-NEXT:    store i32 [[DOTSINK]], ptr @cchPhraseLength, align 4, !tbaa [[TBAA1:![0-9]+]]
+; CHECK-NEXT:    br label [[TMP2:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[DOT1:%.*]] = phi ptr [ [[DOT0]], [[DOTSINK_SPLIT]] ], [ [[TMP3]], [[TMP6:%.*]] ]
+; CHECK-NEXT:    [[TMP3]] = getelementptr inbounds i8, ptr [[DOT1]], i64 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[DOT1]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[DOTPREHEADER_PREHEADER:%.*]], label [[TMP6]]
+; CHECK:       .preheader.preheader:
+; CHECK-NEXT:    br label [[DOTPREHEADER:%.*]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call ptr @__ctype_b_loc() #[[ATTR4:[0-9]+]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT:    [[TMP9:%.*]] = sext i8 [[TMP4]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[TMP10]], align 2, !tbaa [[TBAA7:![0-9]+]]
+; CHECK-NEXT:    [[TMP12:%.*]] = and i16 [[TMP11]], 1024
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i16 [[TMP12]], 0
+; CHECK-NEXT:    br i1 [[TMP13]], label [[TMP2]], label [[TMP14]]
+; CHECK:       14:
+; CHECK-NEXT:    [[TMP15:%.*]] = sext i8 [[TMP4]] to i32
+; CHECK-NEXT:    [[TMP16:%.*]] = tail call i32 @tolower(i32 [[TMP15]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    [[TMP17:%.*]] = add nsw i32 [[TMP16]], -97
+; CHECK-NEXT:    [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[TMP18]], i32 0
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 16, !tbaa [[TBAA9:![0-9]+]]
+; CHECK-NEXT:    [[TMP21:%.*]] = add i32 [[TMP20]], 1
+; CHECK-NEXT:    store i32 [[TMP21]], ptr [[TMP19]], align 16, !tbaa [[TBAA9]]
+; CHECK-NEXT:    [[TMP22]] = add nsw i32 [[DOTSINK]], 1
+; CHECK-NEXT:    br label [[DOTSINK_SPLIT]]
+; CHECK:       .preheader:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[DOTPREHEADER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[TMP57:%.*]] ]
+; CHECK-NEXT:    [[DOT04961:%.*]] = phi i32 [ [[DOT2:%.*]], [[TMP57]] ], [ 0, [[DOTPREHEADER_PREHEADER]] ]
+; CHECK-NEXT:    [[DOT05160:%.*]] = phi i32 [ [[DOT253:%.*]], [[TMP57]] ], [ 0, [[DOTPREHEADER_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[INDVARS_IV]], i32 0
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 16, !tbaa [[TBAA9]]
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP24]], 0
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [26 x i32], ptr @auGlobalFrequency, i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    br i1 [[TMP25]], label [[TMP27:%.*]], label [[TMP28:%.*]]
+; CHECK:       27:
+; CHECK-NEXT:    store i32 -1, ptr [[TMP26]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    br label [[TMP57]]
+; CHECK:       28:
+; CHECK-NEXT:    store i32 0, ptr [[TMP26]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[TMP29:%.*]] = zext i32 [[TMP24]] to i64
+; CHECK-NEXT:    br i1 false, label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]]
+; CHECK:       .lr.ph.preheader:
+; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
+; CHECK:       .lr.ph:
+; CHECK-NEXT:    [[DOT04658:%.*]] = phi i64 [ [[TMP31:%.*]], [[DOTLR_PH]] ], [ 1, [[DOTLR_PH_PREHEADER]] ]
+; CHECK-NEXT:    [[DOT04857:%.*]] = phi i32 [ [[TMP30:%.*]], [[DOTLR_PH]] ], [ 1, [[DOTLR_PH_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP30]] = add nuw nsw i32 [[DOT04857]], 1
+; CHECK-NEXT:    [[TMP31]] = shl i64 [[DOT04658]], 1
+; CHECK-NEXT:    [[TMP32:%.*]] = icmp ult i64 [[TMP29]], [[TMP31]]
+; CHECK-NEXT:    br i1 [[TMP32]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[DOTLR_PH]]
+; CHECK:       ._crit_edge.loopexit:
+; CHECK-NEXT:    br label [[DOT_CRIT_EDGE]]
+; CHECK:       ._crit_edge:
+; CHECK-NEXT:    [[DOT048_LCSSA:%.*]] = phi i32 [ poison, [[TMP28]] ], [ [[TMP30]], [[DOT_CRIT_EDGE_LOOPEXIT]] ]
+; CHECK-NEXT:    [[DOT046_LCSSA:%.*]] = phi i64 [ poison, [[TMP28]] ], [ [[TMP31]], [[DOT_CRIT_EDGE_LOOPEXIT]] ]
+; CHECK-NEXT:    [[TMP33:%.*]] = add nsw i32 [[DOT048_LCSSA]], [[DOT04961]]
+; CHECK-NEXT:    [[TMP34:%.*]] = icmp ugt i32 [[TMP33]], 64
+; CHECK-NEXT:    br i1 [[TMP34]], label [[TMP35:%.*]], label [[TMP39:%.*]]
+; CHECK:       35:
+; CHECK-NEXT:    [[TMP36:%.*]] = add i32 [[DOT05160]], 1
+; CHECK-NEXT:    [[TMP37:%.*]] = icmp ugt i32 [[TMP36]], 1
+; CHECK-NEXT:    br i1 [[TMP37]], label [[TMP38:%.*]], label [[TMP39]]
+; CHECK:       38:
+; CHECK-NEXT:    tail call void @Fatal(ptr @.str.7, i32 0)
+; CHECK-NEXT:    br label [[TMP39]]
+; CHECK:       39:
+; CHECK-NEXT:    [[DOT152:%.*]] = phi i32 [ [[DOT05160]], [[DOT_CRIT_EDGE]] ], [ [[TMP36]], [[TMP38]] ], [ [[TMP36]], [[TMP35]] ]
+; CHECK-NEXT:    [[DOT150:%.*]] = phi i32 [ [[DOT04961]], [[DOT_CRIT_EDGE]] ], [ 0, [[TMP38]] ], [ 0, [[TMP35]] ]
+; CHECK-NEXT:    [[TMP40:%.*]] = add i64 [[DOT046_LCSSA]], 4294967295
+; CHECK-NEXT:    [[TMP41:%.*]] = trunc i64 [[TMP40]] to i32
+; CHECK-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[INDVARS_IV]], i32 2
+; CHECK-NEXT:    store i32 [[TMP41]], ptr [[TMP42]], align 8, !tbaa [[TBAA11:![0-9]+]]
+; CHECK-NEXT:    [[TMP43:%.*]] = zext i32 [[DOT150]] to i64
+; CHECK-NEXT:    [[DOT046_:%.*]] = shl i64 [[DOT046_LCSSA]], [[TMP43]]
+; CHECK-NEXT:    [[TMP44:%.*]] = zext i32 [[DOT152]] to i64
+; CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [2 x i64], ptr @aqMainSign, i64 0, i64 [[TMP44]]
+; CHECK-NEXT:    [[TMP46:%.*]] = load i64, ptr [[TMP45]], align 8, !tbaa [[TBAA12:![0-9]+]]
+; CHECK-NEXT:    [[TMP47:%.*]] = or i64 [[TMP46]], [[DOT046_]]
+; CHECK-NEXT:    store i64 [[TMP47]], ptr [[TMP45]], align 8, !tbaa [[TBAA12]]
+; CHECK-NEXT:    [[TMP48:%.*]] = load i32, ptr [[TMP23]], align 16, !tbaa [[TBAA9]]
+; CHECK-NEXT:    [[TMP49:%.*]] = zext i32 [[TMP48]] to i64
+; CHECK-NEXT:    [[TMP50:%.*]] = shl i64 [[TMP49]], [[TMP43]]
+; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [2 x i64], ptr @aqMainMask, i64 0, i64 [[TMP44]]
+; CHECK-NEXT:    [[TMP52:%.*]] = load i64, ptr [[TMP51]], align 8, !tbaa [[TBAA12]]
+; CHECK-NEXT:    [[TMP53:%.*]] = or i64 [[TMP50]], [[TMP52]]
+; CHECK-NEXT:    store i64 [[TMP53]], ptr [[TMP51]], align 8, !tbaa [[TBAA12]]
+; CHECK-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[INDVARS_IV]], i32 1
+; CHECK-NEXT:    store i32 [[DOT150]], ptr [[TMP54]], align 4, !tbaa [[TBAA14:![0-9]+]]
+; CHECK-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[INDVARS_IV]], i32 3
+; CHECK-NEXT:    store i32 [[DOT152]], ptr [[TMP55]], align 4, !tbaa [[TBAA15:![0-9]+]]
+; CHECK-NEXT:    [[TMP56:%.*]] = add nsw i32 [[DOT150]], [[DOT048_LCSSA]]
+; CHECK-NEXT:    br label [[TMP57]]
+; CHECK:       57:
+; CHECK-NEXT:    [[DOT253]] = phi i32 [ [[DOT05160]], [[TMP27]] ], [ [[DOT152]], [[TMP39]] ]
+; CHECK-NEXT:    [[DOT2]] = phi i32 [ [[DOT04961]], [[TMP27]] ], [ [[TMP56]], [[TMP39]] ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 26
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[DOTPREHEADER]], label [[TMP58:%.*]]
+; CHECK:       58:
+; CHECK-NEXT:    ret void
+;
   tail call void @llvm.memset.p0.i64(ptr align 16 @alPhrase, i8 0, i64 416, i1 false)
   tail call void @llvm.memset.p0.i64(ptr align 16 @aqMainMask, i8 0, i64 16, i1 false)
   tail call void @llvm.memset.p0.i64(ptr align 16 @aqMainSign, i8 0, i64 16, i1 false)
@@ -113,7 +229,6 @@ define void @BuildMask(ptr nocapture readonly) local_unnamed_addr #0 {
 ; If we screw up the revisitation of the users of store of %sink above
 ; we will end up propagating and simplifying this to 1 in the final output
 ; because we keep an optimistic assumption we should not.
-; CHECK:  add i32 %.05160, 1
   %37 = add i32 %.05160, 1
   %38 = icmp ugt i32 %37, 1
   br i1 %38, label %39, label %40
@@ -193,3 +308,20 @@ attributes #5 = { nounwind readonly }
 !14 = !{!"long", !3, i64 0}
 !15 = !{!11, !2, i64 4}
 !16 = !{!11, !2, i64 12}
+;.
+; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"any pointer", [[META3]], i64 0}
+; CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+; CHECK: [[META8]] = !{!"short", [[META3]], i64 0}
+; CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[META10]] = !{!"", [[META2]], i64 0, [[META2]], i64 4, [[META2]], i64 8, [[META2]], i64 12}
+; CHECK: [[TBAA11]] = !{[[META10]], [[META2]], i64 8}
+; CHECK: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; CHECK: [[META13]] = !{!"long", [[META3]], i64 0}
+; CHECK: [[TBAA14]] = !{[[META10]], [[META2]], i64 4}
+; CHECK: [[TBAA15]] = !{[[META10]], [[META2]], i64 12}
+;.
diff --git a/llvm/test/Transforms/NewGVN/metadata-nonnull.ll b/llvm/test/Transforms/NewGVN/metadata-nonnull.ll
index cd0922f..5de4c58 100644
--- a/llvm/test/Transforms/NewGVN/metadata-nonnull.ll
+++ b/llvm/test/Transforms/NewGVN/metadata-nonnull.ll
@@ -150,7 +150,7 @@ define ptr @test7(ptr %v0) {
 ; CHECK-LABEL: define ptr @test7
 ; CHECK-SAME: (ptr [[V0:%.*]]) {
 ; CHECK-NEXT:  top:
-; CHECK-NEXT:    [[V1:%.*]] = load ptr, ptr [[V0]], align 8, !nonnull !0
+; CHECK-NEXT:    [[V1:%.*]] = load ptr, ptr [[V0]], align 8, !nonnull [[META0:![0-9]+]]
 ; CHECK-NEXT:    call void @use2(ptr [[V1]])
 ; CHECK-NEXT:    br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
diff --git a/llvm/test/Transforms/NewGVN/metadata-simplify.ll b/llvm/test/Transforms/NewGVN/metadata-simplify.ll
index a84c581..e981e37 100644
--- a/llvm/test/Transforms/NewGVN/metadata-simplify.ll
+++ b/llvm/test/Transforms/NewGVN/metadata-simplify.ll
@@ -9,11 +9,11 @@ define i1 @test1(ptr %arg, i1 %arg2) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:    br i1 [[ARG2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[LOAD1:%.*]] = load ptr, ptr [[ARG:%.*]], !nonnull !0
+; CHECK-NEXT:    [[LOAD1:%.*]] = load ptr, ptr [[ARG:%.*]], align 8, !nonnull [[META0:![0-9]+]]
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq ptr [[LOAD1]], null
 ; CHECK-NEXT:    ret i1 [[CMP1]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[LOAD2:%.*]] = load ptr, ptr [[ARG]]
+; CHECK-NEXT:    [[LOAD2:%.*]] = load ptr, ptr [[ARG]], align 8
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq ptr [[LOAD2]], null
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
@@ -34,11 +34,11 @@ define i1 @test2(ptr %arg, i1 %arg2) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:    br i1 [[ARG2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[LOAD1:%.*]] = load ptr, ptr [[ARG:%.*]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load ptr, ptr [[ARG:%.*]], align 8
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq ptr [[LOAD1]], null
 ; CHECK-NEXT:    ret i1 [[CMP1]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[LOAD2:%.*]] = load ptr, ptr [[ARG]], !nonnull !0
+; CHECK-NEXT:    [[LOAD2:%.*]] = load ptr, ptr [[ARG]], align 8, !nonnull [[META0]]
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq ptr [[LOAD2]], null
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
@@ -60,11 +60,11 @@ define i1 @test3(ptr %ptr, i1 %arg2) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:    br i1 [[ARG2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[PTR:%.*]], !range !1
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !range [[RNG1:![0-9]+]]
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[LOAD1]], 999
 ; CHECK-NEXT:    ret i1 [[CMP1]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[PTR]]
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[PTR]], align 4
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[LOAD2]], 999
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
@@ -85,11 +85,11 @@ define i1 @test4(ptr %ptr, i1 %arg2) {
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:    br i1 [[ARG2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[PTR:%.*]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[LOAD1]], 999
 ; CHECK-NEXT:    ret i1 [[CMP1]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[PTR]], !range !1
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[PTR]], align 4, !range [[RNG1]]
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[LOAD2]], 999
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
@@ -110,11 +110,11 @@ define i1 @test5(ptr %ptr, i1 %arg2) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:    br i1 [[ARG2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[PTR:%.*]], !range !1
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !range [[RNG1]]
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 999
 ; CHECK-NEXT:    ret i1 [[CMP1]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[PTR]]
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[PTR]], align 4
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[LOAD2]], 999
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
@@ -135,11 +135,11 @@ define i1 @test6(ptr %ptr, i1 %arg2) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:    br i1 [[ARG2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[PTR:%.*]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 999
 ; CHECK-NEXT:    ret i1 [[CMP1]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[PTR]], !range !1
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[PTR]], align 4, !range [[RNG1]]
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[LOAD2]], 999
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
diff --git a/llvm/test/Transforms/NewGVN/noalias.ll b/llvm/test/Transforms/NewGVN/noalias.ll
index 5f0c5dd..2cb5c19 100644
--- a/llvm/test/Transforms/NewGVN/noalias.ll
+++ b/llvm/test/Transforms/NewGVN/noalias.ll
@@ -1,10 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 define i32 @test1(ptr %p, ptr %q) {
-; CHECK-LABEL: @test1(ptr %p, ptr %q)
-; CHECK: load i32, ptr %p
-; CHECK-NOT: noalias
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test1(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = load i32, ptr %p, !noalias !3
   %b = load i32, ptr %p
   %c = add i32 %a, %b
@@ -12,9 +15,12 @@ define i32 @test1(ptr %p, ptr %q) {
 }
 
 define i32 @test2(ptr %p, ptr %q) {
-; CHECK-LABEL: @test2(ptr %p, ptr %q)
-; CHECK: load i32, ptr %p, align 4, !alias.scope ![[SCOPE1:[0-9]+]]
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test2(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[P]], align 4, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = load i32, ptr %p, !alias.scope !3
   %b = load i32, ptr %p, !alias.scope !3
   %c = add i32 %a, %b
@@ -22,17 +28,18 @@ define i32 @test2(ptr %p, ptr %q) {
 }
 
 define i32 @test3(ptr %p, ptr %q) {
-; CHECK-LABEL: @test3(ptr %p, ptr %q)
-; CHECK: load i32, ptr %p, align 4, !alias.scope ![[SCOPE2:[0-9]+]]
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test3(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[P]], align 4, !alias.scope [[META3:![0-9]+]]
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = load i32, ptr %p, !alias.scope !4
   %b = load i32, ptr %p, !alias.scope !5
   %c = add i32 %a, %b
   ret i32 %c
 }
 
-; CHECK:   ![[SCOPE1]] = !{!{{[0-9]+}}}
-; CHECK:   ![[SCOPE2]] = !{!{{[0-9]+}}, !{{[0-9]+}}}
 declare i32 @foo(ptr) readonly
 
 !0 = distinct !{!0, !2, !"callee0: %a"}
@@ -42,3 +49,10 @@ declare i32 @foo(ptr) readonly
 !3 = !{!0}
 !4 = !{!1}
 !5 = !{!0, !1}
+;.
+; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
+; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"callee0: %a"}
+; CHECK: [[META2]] = distinct !{[[META2]], !"callee0"}
+; CHECK: [[META3]] = !{[[META4:![0-9]+]], [[META1]]}
+; CHECK: [[META4]] = distinct !{[[META4]], [[META2]], !"callee0: %b"}
+;.
diff --git a/llvm/test/Transforms/NewGVN/nomemlocation.ll b/llvm/test/Transforms/NewGVN/nomemlocation.ll
index 0f716b3..332e6c6 100644
--- a/llvm/test/Transforms/NewGVN/nomemlocation.ll
+++ b/llvm/test/Transforms/NewGVN/nomemlocation.ll
@@ -1,8 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -S -p='newgvn' | FileCheck %s
 ; MemorySSA should be able to handle a clobber query with an empty MemoryLocation.
 
-; CHECK: @userread
 define ptr @userread(ptr %p) {
+; CHECK-LABEL: define ptr @userread(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[POS:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[DIFF:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[POS]]
+; CHECK-NEXT:    [[LD:%.*]] = load ptr, ptr [[GEP]], align 8
+; CHECK-NEXT:    [[READVAL:%.*]] = call i64 @fread(ptr noundef nonnull [[GEP]], i64 noundef 1, i64 noundef [[POS]], ptr noundef [[LD]])
+; CHECK-NEXT:    [[READVALISPOS:%.*]] = icmp eq i64 [[READVAL]], [[POS]]
+; CHECK-NEXT:    call void @llvm.assume(i1 [[READVALISPOS]])
+; CHECK-NEXT:    [[DIFF]] = sub i64 0, [[POS]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
 entry:
   br label %loop
 
diff --git a/llvm/test/Transforms/NewGVN/non-integral-pointers.ll b/llvm/test/Transforms/NewGVN/non-integral-pointers.ll
index 6119577..1e3da6e 100644
--- a/llvm/test/Transforms/NewGVN/non-integral-pointers.ll
+++ b/llvm/test/Transforms/NewGVN/non-integral-pointers.ll
@@ -1,37 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
 target triple = "x86_64-unknown-linux-gnu"
 
 define void @f0(i1 %alwaysFalse, i64 %val, ptr %loc) {
-; CHECK-LABEL: @f0(
-; CHECK-NOT: inttoptr
-; CHECK-NOT: ptrtoint
- entry:
+; CHECK-LABEL: define void @f0(
+; CHECK-SAME: i1 [[ALWAYSFALSE:%.*]], i64 [[VAL:%.*]], ptr [[LOC:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 [[VAL]], ptr [[LOC]], align 8
+; CHECK-NEXT:    br i1 [[ALWAYSFALSE]], label [[NEVERTAKEN:%.*]], label [[ALWAYSTAKEN:%.*]]
+; CHECK:       neverTaken:
+; CHECK-NEXT:    [[PTR:%.*]] = load ptr addrspace(4), ptr [[LOC]], align 8
+; CHECK-NEXT:    store i8 5, ptr addrspace(4) [[PTR]], align 1
+; CHECK-NEXT:    ret void
+; CHECK:       alwaysTaken:
+; CHECK-NEXT:    ret void
+;
+  entry:
   store i64 %val, ptr %loc
   br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
 
- neverTaken:
+  neverTaken:
   %ptr = load ptr addrspace(4), ptr %loc
   store i8 5, ptr addrspace(4) %ptr
   ret void
 
- alwaysTaken:
+  alwaysTaken:
   ret void
 }
 
 define i64 @f1(i1 %alwaysFalse, ptr addrspace(4) %val, ptr %loc) {
-; CHECK-LABEL: @f1(
-; CHECK-NOT: inttoptr
-; CHECK-NOT: ptrtoint
- entry:
+; CHECK-LABEL: define i64 @f1(
+; CHECK-SAME: i1 [[ALWAYSFALSE:%.*]], ptr addrspace(4) [[VAL:%.*]], ptr [[LOC:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store ptr addrspace(4) [[VAL]], ptr [[LOC]], align 8
+; CHECK-NEXT:    br i1 [[ALWAYSFALSE]], label [[NEVERTAKEN:%.*]], label [[ALWAYSTAKEN:%.*]]
+; CHECK:       neverTaken:
+; CHECK-NEXT:    [[INT:%.*]] = load i64, ptr [[LOC]], align 8
+; CHECK-NEXT:    ret i64 [[INT]]
+; CHECK:       alwaysTaken:
+; CHECK-NEXT:    ret i64 42
+;
+  entry:
   store ptr addrspace(4) %val, ptr %loc
   br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
 
- neverTaken:
+  neverTaken:
   %int = load i64, ptr %loc
   ret i64 %int
 
- alwaysTaken:
+  alwaysTaken:
   ret i64 42
 }
diff --git a/llvm/test/Transforms/NewGVN/null-aliases-nothing.ll b/llvm/test/Transforms/NewGVN/null-aliases-nothing.ll
index 6666211..25295fa 100644
--- a/llvm/test/Transforms/NewGVN/null-aliases-nothing.ll
+++ b/llvm/test/Transforms/NewGVN/null-aliases-nothing.ll
@@ -1,19 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 
 %t = type { i32 }
 declare void @test1f(ptr)
 
 define void @test1(ptr noalias %stuff ) {
-    %before = load i32, ptr %stuff
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: ptr noalias [[STUFF:%.*]]) {
+; CHECK-NEXT:    [[BEFORE:%.*]] = load i32, ptr [[STUFF]], align 4
+; CHECK-NEXT:    call void @test1f(ptr null)
+; CHECK-NEXT:    [[SUM:%.*]] = add i32 [[BEFORE]], [[BEFORE]]
+; CHECK-NEXT:    store i32 [[SUM]], ptr [[STUFF]], align 4
+; CHECK-NEXT:    ret void
+;
+  %before = load i32, ptr %stuff
 
-    call void @test1f(ptr null)
+  call void @test1f(ptr null)
 
-    %after = load i32, ptr %stuff ; <--- This should be a dead load
-    %sum = add i32 %before, %after
+  %after = load i32, ptr %stuff ; <--- This should be a dead load
+  %sum = add i32 %before, %after
 
-    store i32 %sum, ptr %stuff
-    ret void
-; CHECK: load
-; CHECK-NOT: load
-; CHECK: ret void
+  store i32 %sum, ptr %stuff
+  ret void
 }
diff --git a/llvm/test/Transforms/NewGVN/phi-edge-handling.ll b/llvm/test/Transforms/NewGVN/phi-edge-handling.ll
index 8dfac79..0e871b6 100644
--- a/llvm/test/Transforms/NewGVN/phi-edge-handling.ll
+++ b/llvm/test/Transforms/NewGVN/phi-edge-handling.ll
@@ -9,8 +9,8 @@ define i16 @hoge() {
 ; CHECK-LABEL: @hoge(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    switch i8 undef, label [[BB7:%.*]] [
-; CHECK-NEXT:    i8 0, label [[BB1:%.*]]
-; CHECK-NEXT:    i8 12, label [[BB2:%.*]]
+; CHECK-NEXT:      i8 0, label [[BB1:%.*]]
+; CHECK-NEXT:      i8 12, label [[BB2:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[BB6:%.*]]
diff --git a/llvm/test/Transforms/NewGVN/phi-of-ops-simplified-to-existing-value-then-changes-again.ll b/llvm/test/Transforms/NewGVN/phi-of-ops-simplified-to-existing-value-then-changes-again.ll
index ac07148..573a4c0 100644
--- a/llvm/test/Transforms/NewGVN/phi-of-ops-simplified-to-existing-value-then-changes-again.ll
+++ b/llvm/test/Transforms/NewGVN/phi-of-ops-simplified-to-existing-value-then-changes-again.ll
@@ -88,8 +88,8 @@ define void @pr42422(i1 %c.1, i1 %c.2) {
 ; CHECK:       bb16:
 ; CHECK-NEXT:    [[TMP17:%.*]] = phi i32 [ poison, [[BB15]] ], [ 1, [[BB14]] ], [ 9, [[BB7]] ]
 ; CHECK-NEXT:    switch i32 [[TMP17]], label [[BB19]] [
-; CHECK-NEXT:    i32 0, label [[BB6]]
-; CHECK-NEXT:    i32 9, label [[BB18:%.*]]
+; CHECK-NEXT:      i32 0, label [[BB6]]
+; CHECK-NEXT:      i32 9, label [[BB18:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       bb18:
 ; CHECK-NEXT:    br label [[BB19]]
diff --git a/llvm/test/Transforms/NewGVN/phi-translate-partial-alias.ll b/llvm/test/Transforms/NewGVN/phi-translate-partial-alias.ll
index cb24b05..7dd4190 100644
--- a/llvm/test/Transforms/NewGVN/phi-translate-partial-alias.ll
+++ b/llvm/test/Transforms/NewGVN/phi-translate-partial-alias.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
@@ -6,12 +7,19 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; not actually redundant around the loop backedge, despite appearances
 ; if phi-translation is ignored.
 
-; CHECK: define void @test0(ptr %begin)
-; CHECK: loop:
-; CHECK:   %l0 = load i8, ptr %phi
-; CHECK:   call void @bar(i8 %l0)
-; CHECK:   %l1 = load i8, ptr %phi
 define void @test0(ptr %begin) {
+; CHECK-LABEL: define void @test0(
+; CHECK-SAME: ptr [[BEGIN:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ [[BEGIN]], [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L0:%.*]] = load i8, ptr [[PHI]], align 1
+; CHECK-NEXT:    call void @bar(i8 [[L0]])
+; CHECK-NEXT:    [[L1:%.*]] = load i8, ptr [[PHI]], align 1
+; CHECK-NEXT:    [[NEXT]] = getelementptr inbounds i8, ptr [[PHI]], i8 [[L1]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
 entry:
   br label %loop
 
diff --git a/llvm/test/Transforms/NewGVN/pr17732.ll b/llvm/test/Transforms/NewGVN/pr17732.ll
index 427543d..e66547c 100644
--- a/llvm/test/Transforms/NewGVN/pr17732.ll
+++ b/llvm/test/Transforms/NewGVN/pr17732.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S -o - < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -12,6 +13,12 @@ target triple = "x86_64-unknown-linux-gnu"
 @vector_with_zeroinit = common global %struct.with_vector zeroinitializer, align 4
 
 define i32 @main() {
+; CHECK-LABEL: define i32 @main() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 @array_with_zeroinit, ptr align 4 @main.obj_with_array, i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 @vector_with_zeroinit, ptr align 4 @main.obj_with_vector, i64 12, i1 false)
+; CHECK-NEXT:    ret i32 1
+;
 entry:
   tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 @array_with_zeroinit, ptr align 4 @main.obj_with_array, i64 12, i1 false)
   %0 = load i8, ptr getelementptr inbounds (%struct.with_array, ptr @array_with_zeroinit, i64 0, i32 2), align 4
@@ -22,8 +29,6 @@ entry:
   %conv1 = sext i8 %1 to i32
   %and = and i32 %conv0, %conv1
   ret i32 %and
-; CHECK-LABEL: define i32 @main(
-; CHECK: ret i32 1
 }
 
 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)
diff --git a/llvm/test/Transforms/NewGVN/pr17852.ll b/llvm/test/Transforms/NewGVN/pr17852.ll
index 5858982..bffde12 100644
--- a/llvm/test/Transforms/NewGVN/pr17852.ll
+++ b/llvm/test/Transforms/NewGVN/pr17852.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=newgvn
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 %struct.S0 = type { [2 x i8], [2 x i8], [4 x i8], [2 x i8], i32, i32, i32, i32 }
diff --git a/llvm/test/Transforms/NewGVN/pr24397.ll b/llvm/test/Transforms/NewGVN/pr24397.ll
index f3f112a..d998144 100644
--- a/llvm/test/Transforms/NewGVN/pr24397.ll
+++ b/llvm/test/Transforms/NewGVN/pr24397.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -disable-output < %s
 
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/NewGVN/pr24426.ll b/llvm/test/Transforms/NewGVN/pr24426.ll
index e8a88f5..8f0974a 100644
--- a/llvm/test/Transforms/NewGVN/pr24426.ll
+++ b/llvm/test/Transforms/NewGVN/pr24426.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=memcpyopt,mldst-motion,newgvn -S | FileCheck %s
 
 declare void @check(i8)
@@ -5,11 +6,17 @@ declare void @check(i8)
 declare void @write(ptr %res)
 
 define void @test1() {
+; CHECK-LABEL: define void @test1() {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [10 x i8], align 1
+; CHECK-NEXT:    call void @write(ptr [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+; CHECK-NEXT:    call void @check(i8 [[TMP2]])
+; CHECK-NEXT:    ret void
+;
   %1 = alloca [10 x i8]
   call void @write(ptr %1)
   %2 = load i8, ptr %1
 
-; CHECK-NOT: undef
   call void @check(i8 %2)
 
   ret void
diff --git a/llvm/test/Transforms/NewGVN/pr25440.ll b/llvm/test/Transforms/NewGVN/pr25440.ll
index b3ebf44..9d9c4cd 100644
--- a/llvm/test/Transforms/NewGVN/pr25440.ll
+++ b/llvm/test/Transforms/NewGVN/pr25440.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ;RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
@@ -10,19 +11,51 @@ target triple = "thumbv7--linux-gnueabi"
 
 ; Function Attrs: nounwind
 define fastcc void @foo(ptr nocapture readonly %x) {
-;CHECK-LABEL: foo
+; CHECK-LABEL: define fastcc void @foo(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[BB0:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    [[X_TR:%.*]] = phi ptr [ [[X]], [[ENTRY:%.*]] ], [ null, [[LAND_LHS_TRUE:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[X_TR]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP0]] to i32
+; CHECK-NEXT:    switch i32 [[CONV]], label [[IF_END_50:%.*]] [
+; CHECK-NEXT:      i32 43, label [[CLEANUP:%.*]]
+; CHECK-NEXT:      i32 52, label [[IF_THEN_5:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       if.then.5:
+; CHECK-NEXT:    br i1 undef, label [[LAND_LHS_TRUE]], label [[IF_THEN_26:%.*]]
+; CHECK:       land.lhs.true:
+; CHECK-NEXT:    br i1 undef, label [[CLEANUP]], label [[BB0]]
+; CHECK:       if.then.26:
+; CHECK-NEXT:    br i1 undef, label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[MODE:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[X_TR]], i32 0, i32 1
+; CHECK-NEXT:    [[BF_LOAD:%.*]] = load i16, ptr [[MODE]], align 2
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN_44:%.*]], label [[CLEANUP]]
+; CHECK:       if.then.44:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end.50:
+; CHECK-NEXT:    [[ARRAYIDX52:%.*]] = getelementptr inbounds [0 x i32], ptr @length, i32 0, i32 [[CONV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX52]], align 4
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY_57:%.*]], label [[CLEANUP]]
+; CHECK:       for.body.57:
+; CHECK-NEXT:    unreachable
+; CHECK:       cleanup:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %bb0
 
 bb0:                                      ; preds = %land.lhs.true, %entry
-;CHECK: bb0:
   %x.tr = phi ptr [ %x, %entry ], [ null, %land.lhs.true ]
   %0 = load i16, ptr %x.tr, align 4
-; CHECK: load i16, ptr
   %conv = zext i16 %0 to i32
   switch i32 %conv, label %if.end.50 [
-    i32 43, label %cleanup
-    i32 52, label %if.then.5
+  i32 43, label %cleanup
+  i32 52, label %if.then.5
   ]
 
 if.then.5:                                        ; preds = %bb0
@@ -36,8 +69,6 @@ if.then.26:                                       ; preds = %if.then.5
   br i1 undef, label %cond.end, label %cond.false
 
 cond.false:                                       ; preds = %if.then.26
-; CHECK: cond.false:
-; CHECK: load i16
   %mode = getelementptr inbounds %struct.a, ptr %x.tr.lcssa163, i32 0, i32 1
   %bf.load = load i16, ptr %mode, align 2
   %bf.shl = shl i16 %bf.load, 8
@@ -50,7 +81,6 @@ if.then.44:                                       ; preds = %cond.end
   unreachable
 
 if.end.50:                                        ; preds = %bb0
-;%CHECK: if.end.50:
   %conv.lcssa = phi i32 [ %conv, %bb0 ]
   %arrayidx52 = getelementptr inbounds [0 x i32], ptr @length, i32 0, i32 %conv.lcssa
   %1 = load i32, ptr %arrayidx52, align 4
@@ -68,7 +98,34 @@ cleanup:                                          ; preds = %if.end.50, %cond.en
 @dfg_text = external global ptr, align 4
 
 define void @dfg_lex() {
-;CHECK-LABEL: dfg_lex
+; CHECK-LABEL: define void @dfg_lex() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[WHILE_BODYTHREAD_PRE_SPLIT:%.*]]
+; CHECK:       while.bodythread-pre-split:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN_14:%.*]], label [[IF_END_15:%.*]]
+; CHECK:       if.then.14:
+; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr @dfg_text, align 4
+; CHECK-NEXT:    br label [[IF_END_15]]
+; CHECK:       if.end.15:
+; CHECK-NEXT:    [[V2:%.*]] = load ptr, ptr @yy_c_buf_p, align 4
+; CHECK-NEXT:    br label [[WHILE_COND_16:%.*]]
+; CHECK:       while.cond.16:
+; CHECK-NEXT:    br i1 undef, label [[WHILE_COND_16]], label [[WHILE_END:%.*]]
+; CHECK:       while.end:
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[V2]], i32 undef
+; CHECK-NEXT:    store ptr [[ADD_PTR]], ptr @dfg_text, align 4
+; CHECK-NEXT:    [[SUB_PTR_RHS_CAST25:%.*]] = ptrtoint ptr [[ADD_PTR]] to i32
+; CHECK-NEXT:    switch i32 undef, label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:      i32 65, label [[WHILE_BODYTHREAD_PRE_SPLIT]]
+; CHECK-NEXT:      i32 3, label [[RETURN:%.*]]
+; CHECK-NEXT:      i32 57, label [[WHILE_BODYTHREAD_PRE_SPLIT]]
+; CHECK-NEXT:      i32 60, label [[IF_THEN_14]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.default:
+; CHECK-NEXT:    unreachable
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %while.bodythread-pre-split
 
@@ -93,10 +150,10 @@ while.end:                                        ; preds = %while.cond.16
   %sub.ptr.rhs.cast25 = ptrtoint ptr %add.ptr to i32
   %sub.ptr.sub26 = sub i32 0, %sub.ptr.rhs.cast25
   switch i32 undef, label %sw.default [
-    i32 65, label %while.bodythread-pre-split
-    i32 3, label %return
-    i32 57, label %while.bodythread-pre-split
-    i32 60, label %if.then.14
+  i32 65, label %while.bodythread-pre-split
+  i32 3, label %return
+  i32 57, label %while.bodythread-pre-split
+  i32 60, label %if.then.14
   ]
 
 sw.default:                                       ; preds = %while.end
diff --git a/llvm/test/Transforms/NewGVN/pr28562.ll b/llvm/test/Transforms/NewGVN/pr28562.ll
index a62fdd3..320224c 100644
--- a/llvm/test/Transforms/NewGVN/pr28562.ll
+++ b/llvm/test/Transforms/NewGVN/pr28562.ll
@@ -1,9 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -S -passes=newgvn < %s | FileCheck %s
 define ptr @test1(ptr %a) {
+; CHECK-LABEL: define ptr @test1(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    [[X2:%.*]] = getelementptr i32, ptr [[A]], i32 10
+; CHECK-NEXT:    ret ptr [[X2]]
+;
   %x1 = getelementptr inbounds i32, ptr %a, i32 10
   %x2 = getelementptr i32, ptr %a, i32 10
   ret ptr %x2
-; CHECK-LABEL: @test1(
-; CHECK: %[[x:.*]] = getelementptr i32, ptr %a, i32 10
-; CHECK: ret ptr %[[x]]
 }
diff --git a/llvm/test/Transforms/NewGVN/pr31472.ll b/llvm/test/Transforms/NewGVN/pr31472.ll
index 8bb9a14..8eeb614 100644
--- a/llvm/test/Transforms/NewGVN/pr31472.ll
+++ b/llvm/test/Transforms/NewGVN/pr31472.ll
@@ -9,11 +9,13 @@ target triple = "x86_64-apple-macosx10.12.0"
 define i32 @main() personality ptr @__gxx_personality_v0{
 ; CHECK-LABEL: @main(
 ; CHECK-NEXT:    [[TMP1:%.*]] = invoke i32 @foo()
-; CHECK-NEXT:    to label %good unwind label %bad
+; CHECK-NEXT:            to label [[GOOD:%.*]] unwind label [[BAD:%.*]]
 ; CHECK:       good:
 ; CHECK-NEXT:    ret i32 5
 ; CHECK:       bad:
-; CHECK-NEXT:    [[TMP2:%.*]] = landingpad { ptr, i32
+; CHECK-NEXT:    [[TMP2:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    resume { ptr, i32 } [[TMP2]]
 ;
   %1 = invoke i32 @foo()
   to label %good unwind label %bad
diff --git a/llvm/test/Transforms/NewGVN/pr31483.ll b/llvm/test/Transforms/NewGVN/pr31483.ll
index fe957de..0e7461c 100644
--- a/llvm/test/Transforms/NewGVN/pr31483.ll
+++ b/llvm/test/Transforms/NewGVN/pr31483.ll
@@ -10,20 +10,20 @@ define signext i32 @ham(ptr %arg, ptr %arg1) #0 {
 ; CHECK-LABEL: @ham(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    store ptr %arg1, ptr [[TMP]], align 8
-; CHECK-NEXT:    br label %bb2
+; CHECK-NEXT:    store ptr [[ARG1:%.*]], ptr [[TMP]], align 8
+; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi ptr [ %arg, %bb ], [ %tmp7, %bb22 ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi ptr [ [[ARG:%.*]], [[BB:%.*]] ], [ [[TMP7:%.*]], [[BB22:%.*]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %bb6, label %bb23
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB6:%.*]], label [[BB23:%.*]]
 ; CHECK:       bb6:
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP7]] = getelementptr inbounds i8, ptr [[TMP3]], i32 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext i8 [[TMP4]] to i32
-; CHECK-NEXT:    switch i32 [[TMP9]], label %bb22 [
-; CHECK-NEXT:    i32 115, label %bb10
-; CHECK-NEXT:    i32 105, label %bb16
-; CHECK-NEXT:    i32 99, label %bb16
+; CHECK-NEXT:    switch i32 [[TMP9]], label [[BB22]] [
+; CHECK-NEXT:      i32 115, label [[BB10:%.*]]
+; CHECK-NEXT:      i32 105, label [[BB16:%.*]]
+; CHECK-NEXT:      i32 99, label [[BB16]]
 ; CHECK-NEXT:    ]
 ; CHECK:       bb10:
 ; CHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8
@@ -31,15 +31,15 @@ define signext i32 @ham(ptr %arg, ptr %arg1) #0 {
 ; CHECK-NEXT:    store ptr [[TMP12]], ptr [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP11]], align 8
 ; CHECK-NEXT:    [[TMP15:%.*]] = call signext i32 (ptr, ...) @zot(ptr @global, ptr [[TMP14]])
-; CHECK-NEXT:    br label %bb22
+; CHECK-NEXT:    br label [[BB22]]
 ; CHECK:       bb16:
 ; CHECK-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i64 8
 ; CHECK-NEXT:    store ptr [[TMP18]], ptr [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i64 4
-; CHECK-NEXT:    br label %bb22
+; CHECK-NEXT:    br label [[BB22]]
 ; CHECK:       bb22:
-; CHECK-NEXT:    br label %bb2
+; CHECK-NEXT:    br label [[BB2]]
 ; CHECK:       bb23:
 ; CHECK-NEXT:    call void @llvm.va_end(ptr [[TMP]])
 ; CHECK-NEXT:    ret i32 undef
diff --git a/llvm/test/Transforms/NewGVN/pr31491.ll b/llvm/test/Transforms/NewGVN/pr31491.ll
index 5f6b371..f27f13e 100644
--- a/llvm/test/Transforms/NewGVN/pr31491.ll
+++ b/llvm/test/Transforms/NewGVN/pr31491.ll
@@ -7,13 +7,13 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 define internal i32 @pr31491() {
 ; CHECK-LABEL: @pr31491(
 ; CHECK-NEXT:  bb5:
-; CHECK-NEXT:    br label %bb7
+; CHECK-NEXT:    br label [[BB7:%.*]]
 ; CHECK:       bb7:
-; CHECK-NEXT:    [[TMP:%.*]] = phi ptr [ [[TMP:%.*]]11, %bb10 ], [ undef, %bb5 ]
-; CHECK-NEXT:    br label %bb10
+; CHECK-NEXT:    [[TMP:%.*]] = phi ptr [ [[TMP11:%.*]], [[BB10:%.*]] ], [ undef, [[BB5:%.*]] ]
+; CHECK-NEXT:    br label [[BB10]]
 ; CHECK:       bb10:
-; CHECK-NEXT:    [[TMP11:%.*]] = tail call ptr @patatino(ptr [[TMP]])
-; CHECK-NEXT:    br label %bb7
+; CHECK-NEXT:    [[TMP11]] = tail call ptr @patatino(ptr [[TMP]])
+; CHECK-NEXT:    br label [[BB7]]
 ;
 bb5:
   br label %bb7
diff --git a/llvm/test/Transforms/NewGVN/pr31501.ll b/llvm/test/Transforms/NewGVN/pr31501.ll
index 55195fd..18bfcd1 100644
--- a/llvm/test/Transforms/NewGVN/pr31501.ll
+++ b/llvm/test/Transforms/NewGVN/pr31501.ll
@@ -52,30 +52,30 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 define weak_odr hidden ptr @quux(ptr %arg, ptr %arg1) local_unnamed_addr #0 align 2 {
 ; CHECK-LABEL: @quux(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds %struct.barney, ptr %arg, i64 0, i32 3, i32 0, i32 0, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !tbaa !2
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds %struct.barney, ptr %arg, i64 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa !7
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds [[STRUCT_BARNEY:%.*]], ptr [[ARG:%.*]], i64 0, i32 3, i32 0, i32 0, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !tbaa [[TBAA2:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_BARNEY]], ptr [[ARG]], i64 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[TBAA7:![0-9]+]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq ptr [[TMP3]], [[TMP6]]
-; CHECK-NEXT:    br i1 [[TMP7]], label %bb21, label %bb8
+; CHECK-NEXT:    br i1 [[TMP7]], label [[BB21:%.*]], label [[BB8:%.*]]
 ; CHECK:       bb8:
-; CHECK-NEXT:    br label %bb11
+; CHECK-NEXT:    br label [[BB11:%.*]]
 ; CHECK:       bb9:
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq ptr [[TMP18:%.*]], [[TMP6]]
-; CHECK-NEXT:    br i1 [[TMP10]], label %bb19, label %bb11
+; CHECK-NEXT:    br i1 [[TMP10]], label [[BB19:%.*]], label [[BB11]]
 ; CHECK:       bb11:
-; CHECK-NEXT:    [[TMP12:%.*]] = phi ptr [ [[TMP17:%.*]], %bb9 ], [ undef, %bb8 ]
-; CHECK-NEXT:    [[TMP13:%.*]] = phi ptr [ [[TMP18]], %bb9 ], [ [[TMP3]], %bb8 ]
-; CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TMP13]], align 8, !tbaa !8
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq ptr [[TMP15]], %arg1
+; CHECK-NEXT:    [[TMP12:%.*]] = phi ptr [ [[TMP17:%.*]], [[BB9:%.*]] ], [ undef, [[BB8]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi ptr [ [[TMP18]], [[BB9]] ], [ [[TMP3]], [[BB8]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TMP13]], align 8, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq ptr [[TMP15]], [[ARG1:%.*]]
 ; CHECK-NEXT:    [[TMP17]] = select i1 [[TMP16]], ptr [[TMP13]], ptr [[TMP12]]
-; CHECK-NEXT:    [[TMP18]] = getelementptr inbounds %struct.foo, ptr [[TMP13]], i64 1
-; CHECK-NEXT:    br i1 [[TMP16]], label %bb19, label %bb9
+; CHECK-NEXT:    [[TMP18]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[TMP13]], i64 1
+; CHECK-NEXT:    br i1 [[TMP16]], label [[BB19]], label [[BB9]]
 ; CHECK:       bb19:
-; CHECK-NEXT:    [[TMP20:%.*]] = phi ptr [ null, %bb9 ], [ [[TMP17]], %bb11 ]
-; CHECK-NEXT:    br label %bb21
+; CHECK-NEXT:    [[TMP20:%.*]] = phi ptr [ null, [[BB9]] ], [ [[TMP17]], [[BB11]] ]
+; CHECK-NEXT:    br label [[BB21]]
 ; CHECK:       bb21:
-; CHECK-NEXT:    [[TMP22:%.*]] = phi ptr [ null, %bb ], [ [[TMP20]], %bb19 ]
+; CHECK-NEXT:    [[TMP22:%.*]] = phi ptr [ null, [[BB:%.*]] ], [ [[TMP20]], [[BB19]] ]
 ; CHECK-NEXT:    ret ptr [[TMP22]]
 ;
 bb:
diff --git a/llvm/test/Transforms/NewGVN/pr31573.ll b/llvm/test/Transforms/NewGVN/pr31573.ll
index 2382c487..7835e9d 100644
--- a/llvm/test/Transforms/NewGVN/pr31573.ll
+++ b/llvm/test/Transforms/NewGVN/pr31573.ll
@@ -10,7 +10,7 @@ define void @patatino(ptr %blah) {
 ; CHECK:       while.cond:
 ; CHECK-NEXT:    [[MEH:%.*]] = phi ptr [ [[BLAH:%.*]], [[ENTRY:%.*]] ], [ null, [[WHILE_BODY:%.*]] ]
 ; CHECK-NEXT:    switch i32 undef, label [[WHILE_BODY]] [
-; CHECK-NEXT:    i32 666, label [[WHILE_END:%.*]]
+; CHECK-NEXT:      i32 666, label [[WHILE_END:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       while.body:
 ; CHECK-NEXT:    br label [[WHILE_COND]]
diff --git a/llvm/test/Transforms/NewGVN/pr31594.ll b/llvm/test/Transforms/NewGVN/pr31594.ll
index 47294d5..d1a02d6 100644
--- a/llvm/test/Transforms/NewGVN/pr31594.ll
+++ b/llvm/test/Transforms/NewGVN/pr31594.ll
@@ -10,8 +10,8 @@ define i1 @patatino(ptr %blah, i32 %choice) {
 ; CHECK:       while.cond:
 ; CHECK-NEXT:    [[FOO:%.*]] = phi ptr [ [[BLAH:%.*]], [[ENTRY:%.*]] ], [ null, [[WHILE_BODY:%.*]] ]
 ; CHECK-NEXT:    switch i32 [[CHOICE:%.*]], label [[WHILE_BODY]] [
-; CHECK-NEXT:    i32 -1, label [[WHILE_END:%.*]]
-; CHECK-NEXT:    i32 40, label [[LAND_END:%.*]]
+; CHECK-NEXT:      i32 -1, label [[WHILE_END:%.*]]
+; CHECK-NEXT:      i32 40, label [[LAND_END:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       land.end:
 ; CHECK-NEXT:    br label [[WHILE_END]]
@@ -66,7 +66,7 @@ define void @foo(ptr %arg) {
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[BB1]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    store i8 0, ptr [[TMP]], align 1, !g !0
+; CHECK-NEXT:    store i8 0, ptr [[TMP]], align 1, !g [[META0:![0-9]+]]
 ; CHECK-NEXT:    br label [[BB4:%.*]]
 ; CHECK:       bb4:
 ; CHECK-NEXT:    br label [[BB6:%.*]]
@@ -74,13 +74,13 @@ define void @foo(ptr %arg) {
 ; CHECK-NEXT:    br i1 undef, label [[BB9:%.*]], label [[BB7:%.*]]
 ; CHECK:       bb7:
 ; CHECK-NEXT:    switch i8 0, label [[BB6]] [
-; CHECK-NEXT:    i8 6, label [[BB8:%.*]]
+; CHECK-NEXT:      i8 6, label [[BB8:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       bb8:
 ; CHECK-NEXT:    store i8 poison, ptr null, align 1
 ; CHECK-NEXT:    br label [[BB4]]
 ; CHECK:       bb9:
-; CHECK-NEXT:    store i8 0, ptr [[ARG]], align 1, !g !0
+; CHECK-NEXT:    store i8 0, ptr [[ARG]], align 1, !g [[META0]]
 ; CHECK-NEXT:    unreachable
 ;
 bb:
diff --git a/llvm/test/Transforms/NewGVN/pr31613.ll b/llvm/test/Transforms/NewGVN/pr31613.ll
index 943cdbc..0bcf86a 100644
--- a/llvm/test/Transforms/NewGVN/pr31613.ll
+++ b/llvm/test/Transforms/NewGVN/pr31613.ll
@@ -74,7 +74,7 @@ declare void @c.d.p(i64, ptr)
 define void @e(i32 %a0, i32 %a1, ptr %p2) {
 ; CHECK-LABEL: @e(
 ; CHECK-NEXT:    [[F:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A0:%.*]], ptr [[F]], align 4, !g !0
+; CHECK-NEXT:    store i32 [[A0:%.*]], ptr [[F]], align 4, !g [[META0:![0-9]+]]
 ; CHECK-NEXT:    br label [[H:%.*]]
 ; CHECK:       h:
 ; CHECK-NEXT:    call void @c.d.p(i64 8, ptr undef)
@@ -88,10 +88,10 @@ define void @e(i32 %a0, i32 %a1, ptr %p2) {
 ; CHECK-NEXT:    br label [[R]]
 ; CHECK:       r:
 ; CHECK-NEXT:    switch i32 undef, label [[N:%.*]] [
-; CHECK-NEXT:    i32 0, label [[S:%.*]]
+; CHECK-NEXT:      i32 0, label [[S:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       s:
-; CHECK-NEXT:    store i32 [[A1:%.*]], ptr [[F]], align 4, !g !0
+; CHECK-NEXT:    store i32 [[A1:%.*]], ptr [[F]], align 4, !g [[META0]]
 ; CHECK-NEXT:    br label [[H]]
 ; CHECK:       n:
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/NewGVN/pr31682.ll b/llvm/test/Transforms/NewGVN/pr31682.ll
index 00a1bf2..3d8c9e2 100644
--- a/llvm/test/Transforms/NewGVN/pr31682.ll
+++ b/llvm/test/Transforms/NewGVN/pr31682.ll
@@ -9,7 +9,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 define void @bar() {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load ptr, ptr @global
+; CHECK-NEXT:    [[TMP:%.*]] = load ptr, ptr @global, align 8
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br i1 undef, label [[BB2]], label [[BB7:%.*]]
diff --git a/llvm/test/Transforms/NewGVN/pr31758.ll b/llvm/test/Transforms/NewGVN/pr31758.ll
index 4318843..274d605 100644
--- a/llvm/test/Transforms/NewGVN/pr31758.ll
+++ b/llvm/test/Transforms/NewGVN/pr31758.ll
@@ -12,7 +12,7 @@ define void @tinkywinky() {
 ; CHECK:       bb90:
 ; CHECK-NEXT:    br label [[BB90]]
 ; CHECK:       bb138:
-; CHECK-NEXT:    store i8 poison, ptr null
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
 ; CHECK-NEXT:    br label [[BB138:%.*]]
 ;
 bb:
diff --git a/llvm/test/Transforms/NewGVN/pr32607.ll b/llvm/test/Transforms/NewGVN/pr32607.ll
index 4770724..7460ab5 100644
--- a/llvm/test/Transforms/NewGVN/pr32607.ll
+++ b/llvm/test/Transforms/NewGVN/pr32607.ll
@@ -7,7 +7,7 @@ define hidden void @foo() {
 ; CHECK:       if:
 ; CHECK-NEXT:    br i1 false, label [[L50:%.*]], label [[IF]]
 ; CHECK:       L50:
-; CHECK-NEXT:    store i8 poison, ptr null
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
 ; CHECK-NEXT:    ret void
 ;
 top:
diff --git a/llvm/test/Transforms/NewGVN/pr32836.ll b/llvm/test/Transforms/NewGVN/pr32836.ll
index 5488655..00f3fb0 100644
--- a/llvm/test/Transforms/NewGVN/pr32836.ll
+++ b/llvm/test/Transforms/NewGVN/pr32836.ll
@@ -5,19 +5,19 @@
 @b = external global %struct.anon
 define void @tinkywinky(i1 %patatino) {
 ; CHECK-LABEL: @tinkywinky(
-; CHECK-NEXT:    store i32 8, ptr null
+; CHECK-NEXT:    store i32 8, ptr null, align 4
 ; CHECK-NEXT:    br i1 [[PATATINO:%.*]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    br label [[L:%.*]]
 ; CHECK:       L:
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr null
-; CHECK-NEXT:    [[BF_LOAD1:%.*]] = load i32, ptr @b
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr null, align 4
+; CHECK-NEXT:    [[BF_LOAD1:%.*]] = load i32, ptr @b, align 4
 ; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i32 [[TMP1]], 536870911
 ; CHECK-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -536870912
 ; CHECK-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
-; CHECK-NEXT:    store i32 [[BF_SET]], ptr @b
+; CHECK-NEXT:    store i32 [[BF_SET]], ptr @b, align 4
 ; CHECK-NEXT:    br label [[LOR_END:%.*]]
 ; CHECK:       lor.end:
 ; CHECK-NEXT:    br label [[L]]
diff --git a/llvm/test/Transforms/NewGVN/pr32838.ll b/llvm/test/Transforms/NewGVN/pr32838.ll
index 5ba68fa..87c93d9 100644
--- a/llvm/test/Transforms/NewGVN/pr32838.ll
+++ b/llvm/test/Transforms/NewGVN/pr32838.ll
@@ -10,7 +10,7 @@ define void @fn1(i64 noundef %arg) {
 ; CHECK:       if.then:
 ; CHECK-NEXT:    br i1 false, label [[FIRSTPHIBLOCK:%.*]], label [[TEMP:%.*]]
 ; CHECK:       firstphiblock:
-; CHECK-NEXT:    br i1 undef, label %for.cond17thread-pre-split, label [[SECONDPHIBLOCK:%.*]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_COND17THREAD_PRE_SPLIT:%.*]], label [[SECONDPHIBLOCK:%.*]]
 ; CHECK:       secondphiblock:
 ; CHECK-NEXT:    [[SECONDPHI:%.*]] = phi i64 [ [[THIRDPHI:%.*]], [[THIRDPHIBLOCK:%.*]] ], [ undef, [[FIRSTPHIBLOCK]] ]
 ; CHECK-NEXT:    br i1 undef, label [[FIRSTPHIBLOCK]], label [[THIRDPHIBLOCK]]
@@ -55,7 +55,7 @@ define void @fn2(i64 noundef %arg) {
 ; CHECK-NEXT:    br i1 false, label [[FIRSTPHIBLOCK:%.*]], label [[TEMP:%.*]]
 ; CHECK:       firstphiblock:
 ; CHECK-NEXT:    [[FIRSTPHI:%.*]] = phi i64 [ poison, [[IF_THEN]] ], [ [[SECONDPHI:%.*]], [[SECONDPHIBLOCK:%.*]] ]
-; CHECK-NEXT:    br i1 undef, label %for.cond17thread-pre-split, label [[SECONDPHIBLOCK]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_COND17THREAD_PRE_SPLIT:%.*]], label [[SECONDPHIBLOCK]]
 ; CHECK:       secondphiblock:
 ; CHECK-NEXT:    [[SECONDPHI]] = phi i64 [ [[THIRDPHI:%.*]], [[THIRDPHIBLOCK:%.*]] ], [ [[FIRSTPHI]], [[FIRSTPHIBLOCK]] ]
 ; CHECK-NEXT:    br i1 undef, label [[FIRSTPHIBLOCK]], label [[THIRDPHIBLOCK]]
@@ -65,7 +65,7 @@ define void @fn2(i64 noundef %arg) {
 ; CHECK:       for.cond17thread-pre-split:
 ; CHECK-NEXT:    br label [[COND_TRUE]]
 ; CHECK:       cond.true:
-; CHECK-NEXT:    [[FOURTHPHI:%.*]] = phi i64 [ [[ARG:%.*]], [[ENTRY:%.*]] ], [ [[FIRSTPHI]], %for.cond17thread-pre-split ]
+; CHECK-NEXT:    [[FOURTHPHI:%.*]] = phi i64 [ [[ARG:%.*]], [[ENTRY:%.*]] ], [ [[FIRSTPHI]], [[FOR_COND17THREAD_PRE_SPLIT]] ]
 ; CHECK-NEXT:    [[DIV]] = sdiv i64 [[FOURTHPHI]], 4
 ; CHECK-NEXT:    br label [[THIRDPHIBLOCK]]
 ; CHECK:       temp:
@@ -105,7 +105,7 @@ define void @fn3() {
 ; CHECK-NEXT:    [[F_0:%.*]] = phi ptr [ @b, [[ENTRY:%.*]] ], [ @a, [[L1_LOOPEXIT:%.*]] ]
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond.loopexit:
-; CHECK-NEXT:    store i8 poison, ptr null
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    br i1 undef, label [[FOR_END14:%.*]], label [[FOR_COND1_PREHEADER:%.*]]
diff --git a/llvm/test/Transforms/NewGVN/pr32845.ll b/llvm/test/Transforms/NewGVN/pr32845.ll
index d1182a6..29b81b8 100644
--- a/llvm/test/Transforms/NewGVN/pr32845.ll
+++ b/llvm/test/Transforms/NewGVN/pr32845.ll
@@ -13,7 +13,7 @@ define void @tinkywinky() {
 ; CHECK-NEXT:    [[F_0:%.*]] = phi ptr [ @b, [[ENTRY:%.*]] ], [ @a, [[L1_LOOPEXIT:%.*]] ]
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond.loopexit:
-; CHECK-NEXT:    store i8 poison, ptr null
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    br i1 undef, label [[FOR_END14:%.*]], label [[FOR_COND1_PREHEADER:%.*]]
diff --git a/llvm/test/Transforms/NewGVN/pr32852.ll b/llvm/test/Transforms/NewGVN/pr32852.ll
index 4fd5cf1..ad5badd 100644
--- a/llvm/test/Transforms/NewGVN/pr32852.ll
+++ b/llvm/test/Transforms/NewGVN/pr32852.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; Make sure GVN doesn't incorrectly think the branch terminating
 ; bb2 has a constant condition.
 ; RUN: opt -S -passes=newgvn %s | FileCheck %s
@@ -6,13 +7,26 @@
 @patatino = private unnamed_addr constant [3 x i8] c"0\0A\00"
 
 define void @tinkywinky() {
+; CHECK-LABEL: define void @tinkywinky() {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr @a, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sge i32 [[TMP]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[BB2:%.*]], label [[BB7:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[BB5:%.*]], label [[BB7]]
+; CHECK:       bb5:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 (ptr, ...) @printf(ptr @patatino)
+; CHECK-NEXT:    br label [[BB7]]
+; CHECK:       bb7:
+; CHECK-NEXT:    ret void
+;
 bb:
   %tmp = load i32, ptr @a
   %tmp1 = icmp sge i32 %tmp, 0
   br i1 %tmp1, label %bb2, label %bb7
 bb2:
   %tmp4 = icmp sgt i32 %tmp, 0
-; CHECK: br i1 %tmp4, label %bb5, label %bb7
   br i1 %tmp4, label %bb5, label %bb7
 bb5:
   %tmp6 = call i32 (ptr, ...) @printf(ptr @patatino)
diff --git a/llvm/test/Transforms/NewGVN/pr32897.ll b/llvm/test/Transforms/NewGVN/pr32897.ll
index 35a3b00..881c3a8 100644
--- a/llvm/test/Transforms/NewGVN/pr32897.ll
+++ b/llvm/test/Transforms/NewGVN/pr32897.ll
@@ -6,7 +6,7 @@ define void @tinkywinky(ptr %b) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[BODY:%.*]]
 ; CHECK:       body:
-; CHECK-NEXT:    store i64 undef, ptr [[B:%.*]]
+; CHECK-NEXT:    store i64 undef, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[BODY]], label [[END:%.*]]
 ; CHECK:       end:
 ; CHECK-NEXT:    br label [[BODY]]
diff --git a/llvm/test/Transforms/NewGVN/pr32934.ll b/llvm/test/Transforms/NewGVN/pr32934.ll
index fa725f88..c8218c2 100644
--- a/llvm/test/Transforms/NewGVN/pr32934.ll
+++ b/llvm/test/Transforms/NewGVN/pr32934.ll
@@ -1,39 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -S -passes=newgvn %s | FileCheck %s
 
-; CHECK: define void @tinkywinky() {
-; CHECK-NEXT: entry:
-; CHECK-NEXT:   %d = alloca i32, align 4
-; CHECK-NEXT:   store i32 0, ptr null, align 4
-; CHECK-NEXT:   br label %for.cond
-; CHECK: for.cond:                                         ; preds = %if.end, %entry
-; CHECK-NEXT:   %0 = load i32, ptr null, align 4
-; CHECK-NEXT:   %cmp = icmp slt i32 %0, 1
-; CHECK-NEXT:   br i1 %cmp, label %for.body, label %while.cond
-; CHECK: for.body:                                         ; preds = %for.cond
-; CHECK-NEXT:   %1 = load i32, ptr @a, align 4
-; CHECK-NEXT:   store i32 %1, ptr %d, align 4
-; CHECK-NEXT:   br label %L
-; CHECK: L:                                                ; preds = %if.then, %for.body
-; CHECK-NEXT:   %tobool = icmp ne i32 %1, 0
-; CHECK-NEXT:   br i1 %tobool, label %if.then, label %if.end
-; CHECK: if.then:                                          ; preds = %L
-; CHECK-NEXT:   call void (ptr, ...) @printf(ptr @patatino)
-; CHECK-NEXT:   br label %L
-; CHECK: if.end:                                           ; preds = %L
-; CHECK-NEXT:   br label %for.cond
-; CHECK: while.cond:                                       ; preds = %while.body, %for.cond
-; CHECK-NEXT:   br i1 undef, label %while.body, label %while.end
-; CHECK: while.body:                                       ; preds = %while.cond
-; CHECK-NEXT:   call void (ptr, ...) @printf(ptr @patatino)
-; CHECK-NEXT:   br label %while.cond
-; CHECK: while.end:
-; CHECK-NEXT:   %2 = load i32, ptr @a, align 4
-; CHECK-NEXT:   store i32 %2, ptr undef, align 4
-; CHECK-NEXT:   ret void
 
 @a = external global i32, align 4
 @patatino = external unnamed_addr constant [2 x i8], align 1
 define void @tinkywinky() {
+; CHECK-LABEL: define void @tinkywinky() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[D:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 0, ptr null, align 4
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr null, align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 1
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[WHILE_COND:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @a, align 4
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[D]], align 4
+; CHECK-NEXT:    br label [[L:%.*]]
+; CHECK:       L:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void (ptr, ...) @printf(ptr @patatino)
+; CHECK-NEXT:    br label [[L]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       while.cond:
+; CHECK-NEXT:    br i1 undef, label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    call void (ptr, ...) @printf(ptr @patatino)
+; CHECK-NEXT:    br label [[WHILE_COND]]
+; CHECK:       while.end:
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @a, align 4
+; CHECK-NEXT:    store i32 [[TMP2]], ptr undef, align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   %d = alloca i32, align 4
   store i32 0, ptr null, align 4
diff --git a/llvm/test/Transforms/NewGVN/pr32945.ll b/llvm/test/Transforms/NewGVN/pr32945.ll
index ebf3813..7aabe4d 100644
--- a/llvm/test/Transforms/NewGVN/pr32945.ll
+++ b/llvm/test/Transforms/NewGVN/pr32945.ll
@@ -1,9 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -S -passes=newgvn %s | FileCheck %s
-; CHECK-NOT: call i32 @llvm.ssa.copy
 
 @d = external global i32
 @e = external global i32
 define void @tinkywinky() {
+; CHECK-LABEL: define void @tinkywinky() {
+; CHECK-NEXT:    br i1 true, label [[LOR_LHS_FALSE:%.*]], label [[COND_TRUE:%.*]]
+; CHECK:       lor.lhs.false:
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr @d, align 4
+; CHECK-NEXT:    [[PATATINO:%.*]] = load i32, ptr null, align 4
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[TMP]], [[PATATINO]]
+; CHECK-NEXT:    store i32 [[OR]], ptr @d, align 4
+; CHECK-NEXT:    br label [[COND_TRUE]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @e, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @d, align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE6:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.true6:
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp slt i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[CMP7]], label [[COND_FALSE]], label [[COND_FALSE]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    ret void
+;
   br i1 true, label %lor.lhs.false, label %cond.true
 lor.lhs.false:
   %tmp = load i32, ptr @d, align 4
diff --git a/llvm/test/Transforms/NewGVN/pr32952.ll b/llvm/test/Transforms/NewGVN/pr32952.ll
index 5157bb2..49e4843 100644
--- a/llvm/test/Transforms/NewGVN/pr32952.ll
+++ b/llvm/test/Transforms/NewGVN/pr32952.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; PR32952: Don't erroneously consider congruent two phi nodes which
 ; have the same arguments but different incoming edges.
 ; RUN: opt -passes=newgvn -S %s | FileCheck %s
@@ -6,6 +7,31 @@
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
 define i32 @tinkywinky() {
+; CHECK-LABEL: define i32 @tinkywinky() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr @a, align 2
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[CONV]], -1
+; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[NEG]] to i16
+; CHECK-NEXT:    [[CONV3:%.*]] = zext i16 [[CONV1]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[CONV]], [[CONV3]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[TINKY:%.*]], label [[WINKY:%.*]]
+; CHECK:       tinky:
+; CHECK-NEXT:    store i16 2, ptr @a, align 2
+; CHECK-NEXT:    br label [[PATATINO:%.*]]
+; CHECK:       winky:
+; CHECK-NEXT:    br label [[PATATINO]]
+; CHECK:       patatino:
+; CHECK-NEXT:    [[MEH:%.*]] = phi i16 [ [[TMP0]], [[WINKY]] ], [ [[CONV1]], [[TINKY]] ]
+; CHECK-NEXT:    [[BANANA:%.*]] = phi i16 [ [[TMP0]], [[TINKY]] ], [ [[CONV1]], [[WINKY]] ]
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    [[PROMOTED:%.*]] = zext i16 [[BANANA]] to i32
+; CHECK-NEXT:    [[OTHER:%.*]] = zext i16 [[MEH]] to i32
+; CHECK-NEXT:    [[FIRST:%.*]] = tail call i32 (ptr, ...) @printf(ptr @.str, i32 [[PROMOTED]])
+; CHECK-NEXT:    [[SECOND:%.*]] = tail call i32 (ptr, ...) @printf(ptr @.str, i32 [[OTHER]])
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   %0 = load i16, ptr @a, align 2
   %conv = sext i16 %0 to i32
@@ -23,15 +49,11 @@ winky:
   br label %patatino
 
 patatino:
-; CHECK: %meh = phi i16 [ %0, %winky ], [ %conv1, %tinky ]
-; CHECK: %banana = phi i16 [ %0, %tinky ], [ %conv1, %winky ]
   %meh = phi i16 [ %0, %winky ], [ %conv1, %tinky ]
   %banana = phi i16 [ %0, %tinky ], [ %conv1, %winky ]
   br label %end
 
 end:
-; CHECK: %promoted = zext i16 %banana to i32
-; CHECK: %other = zext i16 %meh to i32
   %promoted = zext i16 %banana to i32
   %other = zext i16 %meh to i32
   %first = tail call i32 (ptr, ...) @printf(ptr @.str, i32 %promoted)
diff --git a/llvm/test/Transforms/NewGVN/pr33014.ll b/llvm/test/Transforms/NewGVN/pr33014.ll
index f6e9197..04f9df2 100644
--- a/llvm/test/Transforms/NewGVN/pr33014.ll
+++ b/llvm/test/Transforms/NewGVN/pr33014.ll
@@ -1,33 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; Make sure we don't end up in an infinite recursion in singleReachablePHIPath().
 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 
 @c = external global i64, align 8
 
-; CHECK-LABEL: define void @tinkywinky() {
-; CHECK: entry:
-; CHECK-NEXT:   br i1 undef, label %l2, label %if.then
-; CHECK: if.then:                                          ; preds = %entry
-; CHECK-NEXT:   br label %for.body
-; CHECK: ph:                                               ; preds = %back, %ontrue
-; CHECK-NEXT:   br label %for.body
-; CHECK: for.body:                                         ; preds = %ph, %if.then
-; CHECK-NEXT:   br i1 undef, label %ontrue, label %onfalse
-; CHECK: onfalse:                                          ; preds = %for.body
-; CHECK-NEXT:   %patatino = load i64, ptr @c
-; CHECK-NEXT:   ret void
-; CHECK: ontrue:                                           ; preds = %for.body
-; CHECK-NEXT:   %dipsy = load i64, ptr @c
-; CHECK-NEXT:   br label %ph
-; CHECK: back:                                             ; preds = %l2
-; CHECK-NEXT:   store i8 poison, ptr null
-; CHECK-NEXT:   br label %ph
-; CHECK: end:                                              ; preds = %l2
-; CHECK-NEXT:   ret void
-; CHECK: l2:                                               ; preds = %entry
-; CHECK-NEXT:   br i1 false, label %back, label %end
-; CHECK-NEXT: }
 
 define void @tinkywinky() {
+; CHECK-LABEL: define void @tinkywinky() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[L2:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       ph:
+; CHECK-NEXT:    br label [[FOR_BODY]]
+; CHECK:       for.body:
+; CHECK-NEXT:    br i1 undef, label [[ONTRUE:%.*]], label [[ONFALSE:%.*]]
+; CHECK:       onfalse:
+; CHECK-NEXT:    [[PATATINO:%.*]] = load i64, ptr @c, align 4
+; CHECK-NEXT:    ret void
+; CHECK:       ontrue:
+; CHECK-NEXT:    [[DIPSY:%.*]] = load i64, ptr @c, align 4
+; CHECK-NEXT:    br label [[PH:%.*]]
+; CHECK:       back:
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
+; CHECK-NEXT:    br label [[PH]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+; CHECK:       l2:
+; CHECK-NEXT:    br i1 false, label [[BACK:%.*]], label [[END:%.*]]
+;
 entry:
   br i1 undef, label %l2, label %if.then
 if.then:
diff --git a/llvm/test/Transforms/NewGVN/pr33086.ll b/llvm/test/Transforms/NewGVN/pr33086.ll
index 54802cd..ab6c00d 100644
--- a/llvm/test/Transforms/NewGVN/pr33086.ll
+++ b/llvm/test/Transforms/NewGVN/pr33086.ll
@@ -1,31 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S %s | FileCheck %s
 ; REQUIRES: asserts
 
-; CHECK-LABEL: define void @tinkywinky() {
-; CHECK: entry:
-; CHECK-NEXT:   br i1 undef, label %for.cond18, label %for.cond.preheader
-; CHECK: for.cond.preheader:
-; CHECK-NEXT:   br label %for.cond2thread-pre-split
-; CHECK: for.cond2thread-pre-split:
-; CHECK-NEXT:   %conv24 = phi i32 [ 0, %for.cond.preheader ], [ %conv, %for.inc.split ]
-; CHECK-NEXT:   br label %for.inc.split
-; CHECK: for.inc.split:
-; CHECK-NEXT:   %add = shl nsw i32 %conv24, 16
-; CHECK-NEXT:   %sext23 = add i32 %add, 65536
-; CHECK-NEXT:   %conv = ashr exact i32 %sext23, 16
-; CHECK-NEXT:   %cmp = icmp slt i32 %sext23, 3604480
-; CHECK-NEXT:   br i1 %cmp, label %for.cond2thread-pre-split, label %l1.loopexit
-; CHECK: l1.loopexit:
-; CHECK-NEXT:   br label %l1
-; CHECK: l1:
-; CHECK-NEXT:   %0 = load i16, ptr null, align 2
-; CHECK-NEXT:   %g.0.g.0..pr = load i16, ptr null, align 2
-; CHECK-NEXT:   ret void
-; CHECK: for.cond18:
-; CHECK-NEXT:   br label %l1
-; CHECK-NEXT: }
 
 define void @tinkywinky() {
+; CHECK-LABEL: define void @tinkywinky() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[FOR_COND18:%.*]], label [[FOR_COND_PREHEADER:%.*]]
+; CHECK:       for.cond.preheader:
+; CHECK-NEXT:    br label [[FOR_COND2THREAD_PRE_SPLIT:%.*]]
+; CHECK:       for.cond2thread-pre-split:
+; CHECK-NEXT:    [[CONV24:%.*]] = phi i32 [ 0, [[FOR_COND_PREHEADER]] ], [ [[CONV:%.*]], [[FOR_INC_SPLIT:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_INC_SPLIT]]
+; CHECK:       for.inc.split:
+; CHECK-NEXT:    [[ADD:%.*]] = shl nsw i32 [[CONV24]], 16
+; CHECK-NEXT:    [[SEXT23:%.*]] = add i32 [[ADD]], 65536
+; CHECK-NEXT:    [[CONV]] = ashr exact i32 [[SEXT23]], 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SEXT23]], 3604480
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND2THREAD_PRE_SPLIT]], label [[L1_LOOPEXIT:%.*]]
+; CHECK:       l1.loopexit:
+; CHECK-NEXT:    br label [[L1:%.*]]
+; CHECK:       l1:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr null, align 2
+; CHECK-NEXT:    [[G_0_G_0__PR:%.*]] = load i16, ptr null, align 2
+; CHECK-NEXT:    ret void
+; CHECK:       for.cond18:
+; CHECK-NEXT:    br label [[L1]]
+;
 entry:
   br i1 undef, label %for.cond18, label %for.cond.preheader
 
diff --git a/llvm/test/Transforms/NewGVN/pr33116.ll b/llvm/test/Transforms/NewGVN/pr33116.ll
index f5ef3ae..6609ef9 100644
--- a/llvm/test/Transforms/NewGVN/pr33116.ll
+++ b/llvm/test/Transforms/NewGVN/pr33116.ll
@@ -13,7 +13,7 @@ define void @b() {
 ; CHECK:       c:
 ; CHECK-NEXT:    br i1 undef, label [[IF_G:%.*]], label [[IF_E]]
 ; CHECK:       if.g:
-; CHECK-NEXT:    store i32 undef, ptr @a
+; CHECK-NEXT:    store i32 undef, ptr @a, align 4
 ; CHECK-NEXT:    br label [[WHILE_D]]
 ; CHECK:       if.e:
 ; CHECK-NEXT:    br label [[F]]
diff --git a/llvm/test/Transforms/NewGVN/pr33187.ll b/llvm/test/Transforms/NewGVN/pr33187.ll
index e5c3da2..37668bb 100644
--- a/llvm/test/Transforms/NewGVN/pr33187.ll
+++ b/llvm/test/Transforms/NewGVN/pr33187.ll
@@ -30,7 +30,7 @@ define void @fn1() local_unnamed_addr #0 {
 ; CHECK:       while.body12:
 ; CHECK-NEXT:    br i1 undef, label [[IF_END18]], label [[L]]
 ; CHECK:       L.loopexit:
-; CHECK-NEXT:    store i8 poison, ptr null
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
 ; CHECK-NEXT:    br label [[L]]
 ; CHECK:       L:
 ; CHECK-NEXT:    [[H_125]] = phi i32 [ [[H_127]], [[WHILE_BODY12]] ], [ poison, [[L_LOOPEXIT]] ]
@@ -114,13 +114,13 @@ attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="fals
 define void @a() {
 ; CHECK-LABEL: @a(
 ; CHECK-NEXT:  b:
-; CHECK-NEXT:    store ptr null, ptr null
+; CHECK-NEXT:    store ptr null, ptr null, align 8
 ; CHECK-NEXT:    br label [[D:%.*]]
 ; CHECK:       d:
 ; CHECK-NEXT:    [[I:%.*]] = phi ptr [ null, [[B:%.*]] ], [ [[E:%.*]], [[F:%.*]] ]
 ; CHECK-NEXT:    br i1 undef, label [[F]], label [[G:%.*]]
 ; CHECK:       g:
-; CHECK-NEXT:    store ptr [[I]], ptr null
+; CHECK-NEXT:    store ptr [[I]], ptr null, align 8
 ; CHECK-NEXT:    unreachable
 ; CHECK:       f:
 ; CHECK-NEXT:    [[E]] = getelementptr i8, ptr [[I]], i64 1
diff --git a/llvm/test/Transforms/NewGVN/pr33196.ll b/llvm/test/Transforms/NewGVN/pr33196.ll
index c312d5e..c04b895 100644
--- a/llvm/test/Transforms/NewGVN/pr33196.ll
+++ b/llvm/test/Transforms/NewGVN/pr33196.ll
@@ -1,33 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -S -passes=newgvn %s | FileCheck %s
 
-; CHECK: define i32 @main() {
-; CHECK-NEXT: entry:
-; CHECK-NEXT:   %tmp = load i32, ptr @d, align 4
-; CHECK-NEXT:   %tmp1 = load i32, ptr @c, align 4
-; CHECK-NEXT:   %tobool = icmp eq i32 %tmp1, -1
-; CHECK-NEXT:   br i1 %tobool, label %if.end, label %if.then
-; CHECK: if.then:
-; CHECK-NEXT:   br label %L
-; CHECK: L:
-; CHECK-NEXT:   %e.0 = phi i32 [ 0, %if.then ], [ %e.1, %if.then4 ]
-; CHECK-NEXT:   br label %if.end
-; CHECK: if.end:
-; CHECK-NEXT:   %e.1 = phi i32 [ %e.0, %L ], [ %tmp, %entry ]
-; CHECK-NEXT:   store i32 %e.1, ptr @a, align 4
-; CHECK-NEXT:   %tmp2 = load i32, ptr @b, align 4
-; CHECK-NEXT:   store i32 0, ptr @b, align 4
-; CHECK-NEXT:   %sext = shl i32 %tmp2, 16
-; CHECK-NEXT:   %conv1 = ashr exact i32 %sext, 16
-; CHECK-NEXT:   %add = add nsw i32 %conv1, %tmp1
-; CHECK-NEXT:   %add2 = add nsw i32 %add, %e.1
-; CHECK-NEXT:   store i32 %add2, ptr @a, align 4
-; CHECK-NEXT:   %tobool3 = icmp eq i32 %add2, 0
-; CHECK-NEXT:   br i1 %tobool3, label %if.end5, label %if.then4
-; CHECK: if.then4:
-; CHECK-NEXT:   br label %L
-; CHECK: if.end5:
-; CHECK-NEXT:   ret i32 0
-; CHECK-NEXT: }
 
 @d = global i32 1, align 4
 @c = common global i32 0, align 4
@@ -35,6 +8,34 @@
 @b = common global i32 0, align 4
 
 define i32 @main() {
+; CHECK-LABEL: define i32 @main() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr @d, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @c, align 4
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP1]], -1
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[L:%.*]]
+; CHECK:       L:
+; CHECK-NEXT:    [[E_0:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[E_1:%.*]], [[IF_THEN4:%.*]] ]
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[E_1]] = phi i32 [ [[E_0]], [[L]] ], [ [[TMP]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    store i32 [[E_1]], ptr @a, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @b, align 4
+; CHECK-NEXT:    store i32 0, ptr @b, align 4
+; CHECK-NEXT:    [[SEXT:%.*]] = shl i32 [[TMP2]], 16
+; CHECK-NEXT:    [[CONV1:%.*]] = ashr exact i32 [[SEXT]], 16
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV1]], [[TMP1]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add nsw i32 [[ADD]], [[E_1]]
+; CHECK-NEXT:    store i32 [[ADD2]], ptr @a, align 4
+; CHECK-NEXT:    [[TOBOOL3:%.*]] = icmp eq i32 [[ADD2]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL3]], label [[IF_END5:%.*]], label [[IF_THEN4]]
+; CHECK:       if.then4:
+; CHECK-NEXT:    br label [[L]]
+; CHECK:       if.end5:
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   %tmp = load i32, ptr @d, align 4
   %tmp1 = load i32, ptr @c, align 4
diff --git a/llvm/test/Transforms/NewGVN/pr33204.ll b/llvm/test/Transforms/NewGVN/pr33204.ll
index 99c4824..482e35e 100644
--- a/llvm/test/Transforms/NewGVN/pr33204.ll
+++ b/llvm/test/Transforms/NewGVN/pr33204.ll
@@ -20,10 +20,10 @@ define void @hoge(i32 %arg) {
 ; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ 0, [[BB1:%.*]] ], [ [[ARG:%.*]], [[BB:%.*]] ]
 ; CHECK-NEXT:    br label [[BB6:%.*]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr @global, align 4, !h !0
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr @global, align 4, !h [[META0:![0-9]+]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       bb6:
-; CHECK-NEXT:    store i32 [[TMP]], ptr @global.1, align 4, !h !0
+; CHECK-NEXT:    store i32 [[TMP]], ptr @global.1, align 4, !h [[META0]]
 ; CHECK-NEXT:    br i1 undef, label [[BB7:%.*]], label [[BB1]]
 ; CHECK:       bb7:
 ; CHECK-NEXT:    br i1 undef, label [[BB10:%.*]], label [[BB8:%.*]]
@@ -33,7 +33,7 @@ define void @hoge(i32 %arg) {
 ; CHECK-NEXT:    store i8 poison, ptr null, align 1
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb10:
-; CHECK-NEXT:    store i32 0, ptr @global, align 4, !h !0
+; CHECK-NEXT:    store i32 0, ptr @global, align 4, !h [[META0]]
 ; CHECK-NEXT:    br label [[BB7]]
 ;
 bb:
diff --git a/llvm/test/Transforms/NewGVN/pr33305.ll b/llvm/test/Transforms/NewGVN/pr33305.ll
index f87cf08..3a19f61 100644
--- a/llvm/test/Transforms/NewGVN/pr33305.ll
+++ b/llvm/test/Transforms/NewGVN/pr33305.ll
@@ -19,14 +19,14 @@ target triple = "x86_64-apple-macosx10.12.0"
 define i32 @main() local_unnamed_addr #0 {
 ; CHECK-LABEL: @main(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTPR_I:%.*]] = load i32, ptr @c, align 4, !tbaa !3
+; CHECK-NEXT:    [[DOTPR_I:%.*]] = load i32, ptr @c, align 4, !tbaa [[TBAA3:![0-9]+]]
 ; CHECK-NEXT:    [[CMP13_I:%.*]] = icmp slt i32 [[DOTPR_I]], 1
 ; CHECK-NEXT:    br i1 [[CMP13_I]], label [[FOR_COND1_PREHEADER_LR_PH_I:%.*]], label [[ENTRY_FOR_END9_I_CRIT_EDGE:%.*]]
 ; CHECK:       entry.for.end9.i_crit_edge:
-; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, ptr @h, align 4, !tbaa !3
+; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, ptr @h, align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    br label [[FOR_END9_I:%.*]]
 ; CHECK:       for.cond1.preheader.lr.ph.i:
-; CHECK-NEXT:    [[G_PROMOTED14_I:%.*]] = load i32, ptr @g, align 4, !tbaa !3
+; CHECK-NEXT:    [[G_PROMOTED14_I:%.*]] = load i32, ptr @g, align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER_I:%.*]]
 ; CHECK:       for.cond1.preheader.i:
 ; CHECK-NEXT:    [[INC816_I:%.*]] = phi i32 [ [[DOTPR_I]], [[FOR_COND1_PREHEADER_LR_PH_I]] ], [ [[INC8_I:%.*]], [[FOR_INC7_I:%.*]] ]
@@ -42,9 +42,9 @@ define i32 @main() local_unnamed_addr #0 {
 ; CHECK:       lor.rhs.i:
 ; CHECK-NEXT:    [[LNOT_I:%.*]] = xor i1 [[TOBOOL_I]], true
 ; CHECK-NEXT:    [[LNOT_EXT_I:%.*]] = zext i1 [[LNOT_I]] to i32
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr @e, align 4, !tbaa !3
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr @e, align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[TMP3]], [[LNOT_EXT_I]]
-; CHECK-NEXT:    store i32 [[XOR_I]], ptr @e, align 4, !tbaa !3
+; CHECK-NEXT:    store i32 [[XOR_I]], ptr @e, align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    br label [[LOR_END_I]]
 ; CHECK:       lor.end.i:
 ; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[INC12_I]], 1
@@ -55,28 +55,28 @@ define i32 @main() local_unnamed_addr #0 {
 ; CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt i32 [[INC816_I]], 0
 ; CHECK-NEXT:    br i1 [[CMP_I]], label [[FOR_COND1_PREHEADER_I]], label [[FOR_COND_FOR_END9_CRIT_EDGE_I:%.*]]
 ; CHECK:       for.cond.for.end9_crit_edge.i:
-; CHECK-NEXT:    store i32 0, ptr @g, align 4, !tbaa !3
-; CHECK-NEXT:    store i32 2, ptr @h, align 4, !tbaa !3
-; CHECK-NEXT:    store i32 [[INC8_I]], ptr @c, align 4, !tbaa !3
+; CHECK-NEXT:    store i32 0, ptr @g, align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store i32 2, ptr @h, align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store i32 [[INC8_I]], ptr @c, align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    br label [[FOR_END9_I]]
 ; CHECK:       for.end9.i:
 ; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[DOTPRE]], [[ENTRY_FOR_END9_I_CRIT_EDGE]] ], [ 2, [[FOR_COND_FOR_END9_CRIT_EDGE_I]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr @b, align 8, !tbaa !7
-; CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP5]], align 4, !tbaa !3
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr @e, align 4, !tbaa !3
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr @b, align 8, !tbaa [[TBAA7:![0-9]+]]
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP5]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr @e, align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[CMP10_I:%.*]] = icmp slt i32 [[TMP6]], -1
 ; CHECK-NEXT:    br i1 [[CMP10_I]], label [[IF_THEN_I:%.*]], label [[FN1_EXIT:%.*]]
 ; CHECK:       if.then.i:
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr @f, align 4, !tbaa !3
-; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP5]], align 4, !tbaa !3
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr @f, align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP5]], align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    br label [[FN1_EXIT]]
 ; CHECK:       fn1.exit:
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr @a, align 4, !tbaa !3
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr @a, align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP8]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[PUTS2:%.*]] = tail call i32 @puts(ptr @str.2)
-; CHECK-NEXT:    tail call void @abort()
+; CHECK-NEXT:    tail call void @abort() #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[PUTS:%.*]] = tail call i32 @puts(ptr @str)
diff --git a/llvm/test/Transforms/NewGVN/pr33367.ll b/llvm/test/Transforms/NewGVN/pr33367.ll
index dc5d190..597caa2 100644
--- a/llvm/test/Transforms/NewGVN/pr33367.ll
+++ b/llvm/test/Transforms/NewGVN/pr33367.ll
@@ -11,19 +11,19 @@ define %MNR_struct @f000316011717_2(ptr %pDS, ptr %pCG) #2 {
 ; CHECK-NEXT:  Entry:
 ; CHECK-NEXT:    [[RESTART:%.*]] = alloca [[MNR_STRUCT:%.*]], align 8
 ; CHECK-NEXT:    [[PCARRY:%.*]] = getelementptr [[DS_STRUCT:%.*]], ptr [[PDS:%.*]], i32 0, i32 1
-; CHECK-NEXT:    [[BASE:%.*]] = load ptr, ptr [[PDS]], align 8, !tbaa !14
+; CHECK-NEXT:    [[BASE:%.*]] = load ptr, ptr [[PDS]], align 8, !tbaa [[TBAA14:![0-9]+]]
 ; CHECK-NEXT:    [[ABSADDR:%.*]] = getelementptr i64, ptr [[BASE]], i64 9
-; CHECK-NEXT:    [[EXTARGET:%.*]] = load i64, ptr [[ABSADDR]], align 8, !tbaa !4
+; CHECK-NEXT:    [[EXTARGET:%.*]] = load i64, ptr [[ABSADDR]], align 8, !tbaa [[TBAA4:![0-9]+]]
 ; CHECK-NEXT:    [[TEMPLATE:%.*]] = icmp eq i64 [[EXTARGET]], 8593987412
 ; CHECK-NEXT:    br i1 [[TEMPLATE]], label %"BB3.000316011731#1", label [[BB2_000316011731_5:%.*]]
 ; CHECK:       "BB3.000316011731#1":
 ; CHECK-NEXT:    [[PBASE8:%.*]] = getelementptr [32 x ptr], ptr [[PDS]], i64 0, i64 29
-; CHECK-NEXT:    [[BASE9:%.*]] = load ptr, ptr [[PBASE8]], align 8, !tbaa !14
+; CHECK-NEXT:    [[BASE9:%.*]] = load ptr, ptr [[PBASE8]], align 8, !tbaa [[TBAA14]]
 ; CHECK-NEXT:    [[ABSADDR1:%.*]] = getelementptr i64, ptr [[BASE9]], i64 7
-; CHECK-NEXT:    [[RMEM:%.*]] = load i64, ptr [[ABSADDR1]], align 8, !tbaa !4
+; CHECK-NEXT:    [[RMEM:%.*]] = load i64, ptr [[ABSADDR1]], align 8, !tbaa [[TBAA4]]
 ; CHECK-NEXT:    [[PWT:%.*]] = getelementptr [[DS_STRUCT]], ptr [[PDS]], i32 0, i32 2
 ; CHECK-NEXT:    [[PWTE:%.*]] = getelementptr [32 x i16], ptr [[PWT]], i64 0, i64 8593987412
-; CHECK-NEXT:    [[SHIFTS:%.*]] = load i16, ptr [[PWTE]], align 2, !tbaa !18, !invariant.load !20
+; CHECK-NEXT:    [[SHIFTS:%.*]] = load i16, ptr [[PWTE]], align 2, !tbaa [[TBAA18:![0-9]+]], !invariant.load [[META20:![0-9]+]]
 ; CHECK-NEXT:    [[SLOWJ:%.*]] = icmp eq i16 [[SHIFTS]], 0
 ; CHECK-NEXT:    br i1 [[SLOWJ]], label [[BB2_000316011731_5]], label %"BB3.000316011731#1.1"
 ; CHECK:       BB2.000316011731.5:
@@ -34,22 +34,22 @@ define %MNR_struct @f000316011717_2(ptr %pDS, ptr %pCG) #2 {
 ; CHECK-NEXT:    [[SHIFTS1:%.*]] = zext i16 [[SHIFTS]] to i64
 ; CHECK-NEXT:    [[VAL:%.*]] = call i64 @llvm.x86.bmi.bextr.64(i64 [[RMEM]], i64 [[SHIFTS1]])
 ; CHECK-NEXT:    [[PREG:%.*]] = getelementptr [64 x i64], ptr [[PCG:%.*]], i64 0, i64 12
-; CHECK-NEXT:    store i64 [[VAL]], ptr [[PREG]], align 32, !tbaa !10
+; CHECK-NEXT:    store i64 [[VAL]], ptr [[PREG]], align 32, !tbaa [[TBAA10:![0-9]+]]
 ; CHECK-NEXT:    [[PREG2:%.*]] = getelementptr [64 x i64], ptr [[PCG]], i64 0, i64 14
-; CHECK-NEXT:    [[REG:%.*]] = load i64, ptr [[PREG2]], align 16, !tbaa !12
-; CHECK-NEXT:    [[BASE2:%.*]] = load ptr, ptr [[PBASE8]], align 8, !tbaa !14
+; CHECK-NEXT:    [[REG:%.*]] = load i64, ptr [[PREG2]], align 16, !tbaa [[TBAA12:![0-9]+]]
+; CHECK-NEXT:    [[BASE2:%.*]] = load ptr, ptr [[PBASE8]], align 8, !tbaa [[TBAA14]]
 ; CHECK-NEXT:    [[ABSADDR2:%.*]] = getelementptr i64, ptr [[BASE2]], i64 [[REG]]
-; CHECK-NEXT:    [[RMEM2:%.*]] = load i64, ptr [[ABSADDR2]], align 8, !tbaa !1
+; CHECK-NEXT:    [[RMEM2:%.*]] = load i64, ptr [[ABSADDR2]], align 8, !tbaa [[TBAA1:![0-9]+]]
 ; CHECK-NEXT:    [[PREG7:%.*]] = getelementptr [64 x i64], ptr [[PCG]], i64 0, i64 9
-; CHECK-NEXT:    store i64 [[RMEM2]], ptr [[PREG7]], align 8, !tbaa !8
+; CHECK-NEXT:    store i64 [[RMEM2]], ptr [[PREG7]], align 8, !tbaa [[TBAA8:![0-9]+]]
 ; CHECK-NEXT:    [[ADD2C279:%.*]] = add i64 [[RMEM2]], [[VAL]]
 ; CHECK-NEXT:    [[CCHK:%.*]] = icmp sge i64 [[ADD2C279]], 0
 ; CHECK-NEXT:    [[CFL:%.*]] = zext i1 [[CCHK]] to i8
-; CHECK-NEXT:    store i8 [[CFL]], ptr [[PCARRY]], align 1, !tbaa !16
+; CHECK-NEXT:    store i8 [[CFL]], ptr [[PCARRY]], align 1, !tbaa [[TBAA16:![0-9]+]]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       Exit:
 ; CHECK-NEXT:    [[RESTART378:%.*]] = load [[MNR_STRUCT]], ptr [[RESTART]], align 8
-; CHECK-NEXT:    ret [[MNR_STRUCT]] %restart378
+; CHECK-NEXT:    ret [[MNR_STRUCT]] [[RESTART378]]
 ;
 Entry:
   %restart = alloca %MNR_struct
diff --git a/llvm/test/Transforms/NewGVN/pr34452.ll b/llvm/test/Transforms/NewGVN/pr34452.ll
index f5d5fda..9e65349 100644
--- a/llvm/test/Transforms/NewGVN/pr34452.ll
+++ b/llvm/test/Transforms/NewGVN/pr34452.ll
@@ -9,7 +9,7 @@ source_filename = "bugpoint-output-09f7a24.bc"
 define void @sgrep() local_unnamed_addr #0 {
 ; CHECK-LABEL: @sgrep(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @WHOLELINE, align 4, !tbaa !1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @WHOLELINE, align 4, !tbaa [[TBAA1:![0-9]+]]
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[TOBOOL]], i32 2048, i32 2047
 ; CHECK-NEXT:    br label [[WHILE_BODY_US:%.*]]
diff --git a/llvm/test/Transforms/NewGVN/pr42422-phi-of-ops.ll b/llvm/test/Transforms/NewGVN/pr42422-phi-of-ops.ll
index cbdf209..1312f9f 100644
--- a/llvm/test/Transforms/NewGVN/pr42422-phi-of-ops.ll
+++ b/llvm/test/Transforms/NewGVN/pr42422-phi-of-ops.ll
@@ -40,8 +40,8 @@ define void @d() {
 ; CHECK:       cleanup:
 ; CHECK-NEXT:    [[CLEANUP_DEST:%.*]] = phi i32 [ poison, [[IF_END12]] ], [ 1, [[IF_THEN11]] ], [ 9, [[IF_THEN]] ]
 ; CHECK-NEXT:    switch i32 [[CLEANUP_DEST]], label [[CLEANUP14]] [
-; CHECK-NEXT:    i32 0, label [[FOR_COND4]]
-; CHECK-NEXT:    i32 9, label [[FOR_END13:%.*]]
+; CHECK-NEXT:      i32 0, label [[FOR_COND4]]
+; CHECK-NEXT:      i32 9, label [[FOR_END13:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       for.end13:
 ; CHECK-NEXT:    br label [[CLEANUP14]]
diff --git a/llvm/test/Transforms/NewGVN/pr43441.ll b/llvm/test/Transforms/NewGVN/pr43441.ll
index 5c4a9c3..a5f711d 100644
--- a/llvm/test/Transforms/NewGVN/pr43441.ll
+++ b/llvm/test/Transforms/NewGVN/pr43441.ll
@@ -1,15 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; CHECK-LABEL: @print_long_format()
 define dso_local void @print_long_format() #0 {
+; CHECK-LABEL: define dso_local void @print_long_format(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 undef, label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:      i32 1, label [[SW_BB:%.*]]
+; CHECK-NEXT:      i32 0, label [[SW_BB19:%.*]]
+; CHECK-NEXT:      i32 2, label [[SW_BB23:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb:
+; CHECK-NEXT:    unreachable
+; CHECK:       sw.bb19:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN37:%.*]], label [[IF_END50:%.*]]
+; CHECK:       sw.bb23:
+; CHECK-NEXT:    unreachable
+; CHECK:       sw.default:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.then37:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end50:
+; CHECK-NEXT:    [[CALL180:%.*]] = call i32 @timespec_cmp() #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    ret void
+;
 entry:
   switch i32 undef, label %sw.default [
-    i32 1, label %sw.bb
-    i32 0, label %sw.bb19
-    i32 2, label %sw.bb23
+  i32 1, label %sw.bb
+  i32 0, label %sw.bb19
+  i32 2, label %sw.bb23
   ]
 
 sw.bb:                                            ; preds = %entry
diff --git a/llvm/test/Transforms/NewGVN/pre-compare.ll b/llvm/test/Transforms/NewGVN/pre-compare.ll
index 9fd20fc..8e4e5f8 100644
--- a/llvm/test/Transforms/NewGVN/pre-compare.ll
+++ b/llvm/test/Transforms/NewGVN/pre-compare.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 ; C source:
@@ -37,6 +38,28 @@
 @.str3 = private unnamed_addr constant [12 x i8] c"step 2: %d\0A\00", align 1
 
 define void @f(i32 %x) noreturn nounwind uwtable ssp {
+; CHECK-LABEL: define void @f(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 1
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND_PREHEADER:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[X]], 2
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP1]], ptr @.str, ptr @.str1
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @puts(ptr [[COND]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    br label [[FOR_COND_PREHEADER]]
+; CHECK:       for.cond.preheader:
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[X]], 2
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[CALL2:%.*]] = tail call i32 @puts(ptr @.str2) #[[ATTR1]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[FOR_COND_BACKEDGE:%.*]], label [[IF_END5:%.*]]
+; CHECK:       if.end5:
+; CHECK-NEXT:    [[CALL6:%.*]] = tail call i32 (ptr, ...) @printf(ptr @.str3, i32 [[X]]) #[[ATTR1]]
+; CHECK-NEXT:    br label [[FOR_COND_BACKEDGE]]
+; CHECK:       for.cond.backedge:
+; CHECK-NEXT:    br label [[FOR_COND]]
+;
 entry:
   %cmp = icmp eq i32 %x, 1
   br i1 %cmp, label %for.cond.preheader, label %if.then
diff --git a/llvm/test/Transforms/NewGVN/preserve-metadata-for-predicate-replacements.ll b/llvm/test/Transforms/NewGVN/preserve-metadata-for-predicate-replacements.ll
index 1ca24af..a63ca13 100644
--- a/llvm/test/Transforms/NewGVN/preserve-metadata-for-predicate-replacements.ll
+++ b/llvm/test/Transforms/NewGVN/preserve-metadata-for-predicate-replacements.ll
@@ -9,7 +9,7 @@ declare void @use(i32)
 define i32 @test(ptr %p1, ptr %p2, i1 %c) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LV:%.*]] = load i32, ptr [[P1:%.*]], align 8, !tbaa !0
+; CHECK-NEXT:    [[LV:%.*]] = load i32, ptr [[P1:%.*]], align 8, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[CMP_1:%.*]] = icmp slt i32 [[LV]], 1
 ; CHECK-NEXT:    br i1 [[CMP_1]], label [[EXIT:%.*]], label [[IF_FALSE:%.*]]
 ; CHECK:       if.false:
diff --git a/llvm/test/Transforms/NewGVN/readattrs.ll b/llvm/test/Transforms/NewGVN/readattrs.ll
index 049a2fc..544fe45 100644
--- a/llvm/test/Transforms/NewGVN/readattrs.ll
+++ b/llvm/test/Transforms/NewGVN/readattrs.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S -o - < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
@@ -6,12 +7,15 @@ target triple = "x86_64-unknown-linux-gnu"
 declare void @use(ptr readonly nocapture)
 
 define i8 @test() {
+; CHECK-LABEL: define i8 @test() {
+; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 1, ptr [[A]], align 1
+; CHECK-NEXT:    call void @use(ptr [[A]])
+; CHECK-NEXT:    ret i8 1
+;
   %a = alloca i8
   store i8 1, ptr %a
   call void @use(ptr %a)
   %b = load i8, ptr %a
   ret i8 %b
-; CHECK-LABEL: define i8 @test(
-; CHECK: call void @use(ptr %a)
-; CHECK-NEXT: ret i8 1
 }
diff --git a/llvm/test/Transforms/NewGVN/rle-nonlocal.ll b/llvm/test/Transforms/NewGVN/rle-nonlocal.ll
index c2fb391..efdfd1f 100644
--- a/llvm/test/Transforms/NewGVN/rle-nonlocal.ll
+++ b/llvm/test/Transforms/NewGVN/rle-nonlocal.ll
@@ -7,14 +7,14 @@ define i32 @main(ptr %p, i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
 ; CHECK:       block2:
-; CHECK-NEXT:    [[A:%.*]] = load ptr, ptr [[P:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = load ptr, ptr [[P:%.*]], align 8
 ; CHECK-NEXT:    br label [[BLOCK4:%.*]]
 ; CHECK:       block3:
-; CHECK-NEXT:    [[B:%.*]] = load ptr, ptr [[P]]
+; CHECK-NEXT:    [[B:%.*]] = load ptr, ptr [[P]], align 8
 ; CHECK-NEXT:    br label [[BLOCK4]]
 ; CHECK:       block4:
 ; CHECK-NEXT:    [[EXISTINGPHI:%.*]] = phi ptr [ [[A]], [[BLOCK2]] ], [ [[B]], [[BLOCK3]] ]
-; CHECK-NEXT:    [[C:%.*]] = load i32, ptr [[EXISTINGPHI]]
+; CHECK-NEXT:    [[C:%.*]] = load i32, ptr [[EXISTINGPHI]], align 4
 ; CHECK-NEXT:    [[E:%.*]] = add i32 [[C]], [[C]]
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
diff --git a/llvm/test/Transforms/NewGVN/rle.ll b/llvm/test/Transforms/NewGVN/rle.ll
index 1cfdc83..950c492 100644
--- a/llvm/test/Transforms/NewGVN/rle.ll
+++ b/llvm/test/Transforms/NewGVN/rle.ll
@@ -1,15 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -passes=newgvn,dce -S | FileCheck %s
 ; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -passes=newgvn,dce -S | FileCheck %s
 ; memset -> i16 forwarding.
 define signext i16 @memset_to_i16_local(ptr %A) nounwind ssp {
+; CHECK-LABEL: define signext i16 @memset_to_i16_local(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr [[A]], i8 1, i64 200, i1 false)
+; CHECK-NEXT:    ret i16 257
+;
 entry:
   tail call void @llvm.memset.p0.i64(ptr %A, i8 1, i64 200, i1 false)
   %arrayidx = getelementptr inbounds i16, ptr %A, i64 42
   %tmp2 = load i16, ptr %arrayidx
   ret i16 %tmp2
-; CHECK-LABEL: @memset_to_i16_local(
-; CHECK-NOT: load
-; CHECK: ret i16 257
 }
 
 @GCst = constant {i32, float, i32 } { i32 42, float 14., i32 97 }
@@ -17,37 +21,48 @@ entry:
 
 ; memset -> float forwarding.
 define float @memcpy_to_float_local(ptr %A) nounwind ssp {
+; CHECK-LABEL: define float @memcpy_to_float_local(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr [[A]], ptr @GCst, i64 12, i1 false)
+; CHECK-NEXT:    ret float 1.400000e+01
+;
 entry:
   tail call void @llvm.memcpy.p0.p0.i64(ptr %A, ptr @GCst, i64 12, i1 false)
   %arrayidx = getelementptr inbounds float, ptr %A, i64 1 ; <ptr> [#uses=1]
   %tmp2 = load float, ptr %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
-; CHECK-LABEL: @memcpy_to_float_local(
-; CHECK-NOT: load
-; CHECK: ret float 1.400000e+01
 }
 ; memcpy from address space 1
 define float @memcpy_to_float_local_as1(ptr %A) nounwind ssp {
+; CHECK-LABEL: define float @memcpy_to_float_local_as1(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p1.i64(ptr [[A]], ptr addrspace(1) @GCst_as1, i64 12, i1 false)
+; CHECK-NEXT:    ret float 1.400000e+01
+;
 entry:
   tail call void @llvm.memcpy.p0.p1.i64(ptr %A, ptr addrspace(1) @GCst_as1, i64 12, i1 false)
   %arrayidx = getelementptr inbounds float, ptr %A, i64 1 ; <ptr> [#uses=1]
   %tmp2 = load float, ptr %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
-; CHECK-LABEL: @memcpy_to_float_local_as1(
-; CHECK-NOT: load
-; CHECK: ret float 1.400000e+01
 }
 
 ; PR6642
 define i32 @memset_to_load() nounwind readnone {
+; CHECK-LABEL: define i32 @memset_to_load(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = alloca [256 x i32], align 4
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[X]], i8 0, i64 1024, i1 false)
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   %x = alloca [256 x i32], align 4                ; <ptr> [#uses=2]
   call void @llvm.memset.p0.i64(ptr align 4 %x, i8 0, i64 1024, i1 false)
   %arraydecay = getelementptr inbounds [256 x i32], ptr %x, i32 0, i32 0 ; <ptr>
   %tmp1 = load i32, ptr %arraydecay                   ; <i32> [#uses=1]
   ret i32 %tmp1
-; CHECK-LABEL: @memset_to_load(
-; CHECK: ret i32 0
 }
 declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
 
diff --git a/llvm/test/Transforms/NewGVN/simp-to-self.ll b/llvm/test/Transforms/NewGVN/simp-to-self.ll
index fb8a019..f9a0ec2 100644
--- a/llvm/test/Transforms/NewGVN/simp-to-self.ll
+++ b/llvm/test/Transforms/NewGVN/simp-to-self.ll
@@ -1,13 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -S < %s -passes=newgvn | FileCheck %s
 
-; CHECK-LABEL: for.cond:
-; CHECK-NEXT:    %lv = load i32, ptr @a
-; CHECK-NEXT:    %bf.clear = and i32 %lv, -131072
-; CHECK-NEXT:    %bf.set = or i32 1, %bf.clear
-; CHECK-NEXT:    br i1 %bc, label %for.cond, label %exit
 @a = external global i64
 
 define void @fn1(i1 %bc) {
+; CHECK-LABEL: define void @fn1(
+; CHECK-SAME: i1 [[BC:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[LV:%.*]] = load i32, ptr @a, align 4
+; CHECK-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[LV]], -131072
+; CHECK-NEXT:    [[BF_SET:%.*]] = or i32 1, [[BF_CLEAR]]
+; CHECK-NEXT:    br i1 [[BC]], label [[FOR_COND]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    store i32 [[BF_SET]], ptr @a, align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %for.cond
 
diff --git a/llvm/test/Transforms/NewGVN/stale-loop-info.ll b/llvm/test/Transforms/NewGVN/stale-loop-info.ll
index 8870824..7abe80b 100644
--- a/llvm/test/Transforms/NewGVN/stale-loop-info.ll
+++ b/llvm/test/Transforms/NewGVN/stale-loop-info.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes='require<loops>,newgvn' -S < %s | FileCheck %s
 
 ; This used to fail with ASAN enabled and if for some reason LoopInfo remained
@@ -14,6 +15,29 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 declare void @snork.1(ptr) local_unnamed_addr #0
 
 define hidden zeroext i1 @eggs(ptr %arg, i1 %arg2) unnamed_addr align 2 {
+; CHECK-LABEL: define hidden zeroext i1 @eggs(
+; CHECK-SAME: ptr [[ARG:%.*]], i1 [[ARG2:%.*]]) unnamed_addr align 2 {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br i1 [[ARG2]], label [[BB14:%.*]], label [[BB3:%.*]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds [[STRUCT_WIBBLE_1028:%.*]], ptr [[ARG]], i64 0, i32 2, i32 0, i32 0, i64 0
+; CHECK-NEXT:    br label [[BB6:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    br label [[BB7:%.*]]
+; CHECK:       bb7:
+; CHECK-NEXT:    br i1 undef, label [[BB11:%.*]], label [[BB8:%.*]]
+; CHECK:       bb8:
+; CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8
+; CHECK-NEXT:    br label [[BB12:%.*]]
+; CHECK:       bb11:
+; CHECK-NEXT:    br label [[BB12]]
+; CHECK:       bb12:
+; CHECK-NEXT:    [[TMP13:%.*]] = phi ptr [ [[TMP]], [[BB11]] ], [ [[TMP9]], [[BB8]] ]
+; CHECK-NEXT:    call void @snork.1(ptr [[TMP13]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    br label [[BB6]]
+; CHECK:       bb14:
+; CHECK-NEXT:    ret i1 false
+;
 bb:
   br i1 %arg2, label %bb14, label %bb3
 
@@ -29,7 +53,6 @@ bb7:                                              ; preds = %bb6
 
 bb8:                                              ; preds = %bb7
   %tmp9 = load ptr, ptr %tmp, align 8
-; CHECK: %tmp9 = load ptr, ptr %tmp, align 8
   br label %bb12
 
 bb11:                                             ; preds = %bb7
diff --git a/llvm/test/Transforms/NewGVN/tbaa.ll b/llvm/test/Transforms/NewGVN/tbaa.ll
index e6d66dc..335e782 100644
--- a/llvm/test/Transforms/NewGVN/tbaa.ll
+++ b/llvm/test/Transforms/NewGVN/tbaa.ll
@@ -1,10 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 define i32 @test1(ptr %p, ptr %q) {
-; CHECK-LABEL: @test1(ptr %p, ptr %q)
-; CHECK: call i32 @foo(ptr %p)
-; CHECK-NOT: tbaa
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test1(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]])
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = call i32 @foo(ptr %p), !tbaa !0
   %b = call i32 @foo(ptr %p)
   %c = add i32 %a, %b
@@ -12,9 +15,12 @@ define i32 @test1(ptr %p, ptr %q) {
 }
 
 define i32 @test2(ptr %p, ptr %q) {
-; CHECK-LABEL: @test2(ptr %p, ptr %q)
-; CHECK: call i32 @foo(ptr %p), !tbaa [[TAGC:!.*]]
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test2(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = call i32 @foo(ptr %p), !tbaa !0
   %b = call i32 @foo(ptr %p), !tbaa !0
   %c = add i32 %a, %b
@@ -22,9 +28,12 @@ define i32 @test2(ptr %p, ptr %q) {
 }
 
 define i32 @test3(ptr %p, ptr %q) {
-; CHECK-LABEL: @test3(ptr %p, ptr %q)
-; CHECK: call i32 @foo(ptr %p), !tbaa [[TAGB:!.*]]
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test3(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = call i32 @foo(ptr %p), !tbaa !3
   %b = call i32 @foo(ptr %p), !tbaa !3
   %c = add i32 %a, %b
@@ -32,9 +41,12 @@ define i32 @test3(ptr %p, ptr %q) {
 }
 
 define i32 @test4(ptr %p, ptr %q) {
-; CHECK-LABEL: @test4(ptr %p, ptr %q)
-; CHECK: call i32 @foo(ptr %p), !tbaa [[TAGA:!.*]]
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test4(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6:![0-9]+]]
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = call i32 @foo(ptr %p), !tbaa !1
   %b = call i32 @foo(ptr %p), !tbaa !0
   %c = add i32 %a, %b
@@ -42,9 +54,12 @@ define i32 @test4(ptr %p, ptr %q) {
 }
 
 define i32 @test5(ptr %p, ptr %q) {
-; CHECK-LABEL: @test5(ptr %p, ptr %q)
-; CHECK: call i32 @foo(ptr %p), !tbaa [[TAGA]]
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test5(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6]]
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = call i32 @foo(ptr %p), !tbaa !0
   %b = call i32 @foo(ptr %p), !tbaa !1
   %c = add i32 %a, %b
@@ -52,9 +67,12 @@ define i32 @test5(ptr %p, ptr %q) {
 }
 
 define i32 @test6(ptr %p, ptr %q) {
-; CHECK-LABEL: @test6(ptr %p, ptr %q)
-; CHECK: call i32 @foo(ptr %p), !tbaa [[TAGA]]
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test6(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6]]
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = call i32 @foo(ptr %p), !tbaa !0
   %b = call i32 @foo(ptr %p), !tbaa !3
   %c = add i32 %a, %b
@@ -62,10 +80,12 @@ define i32 @test6(ptr %p, ptr %q) {
 }
 
 define i32 @test7(ptr %p, ptr %q) {
-; CHECK-LABEL: @test7(ptr %p, ptr %q)
-; CHECK: call i32 @foo(ptr %p)
-; CHECK-NOT: tbaa
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test7(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]])
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = call i32 @foo(ptr %p), !tbaa !4
   %b = call i32 @foo(ptr %p), !tbaa !3
   %c = add i32 %a, %b
@@ -73,9 +93,11 @@ define i32 @test7(ptr %p, ptr %q) {
 }
 
 define i32 @test8(ptr %p, ptr %q) {
-; CHECK-LABEL: @test8
-; CHECK-NEXT: store i32 15, ptr %p
-; CHECK-NEXT: ret i32 0
+; CHECK-LABEL: define i32 @test8(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    store i32 15, ptr [[P]], align 4
+; CHECK-NEXT:    ret i32 0
+;
 ; Since we know the location is invariant, we can forward the
 ; load across the potentially aliasing store.
 
@@ -87,9 +109,11 @@ define i32 @test8(ptr %p, ptr %q) {
 }
 
 define i32 @test9(ptr %p, ptr %q) {
-; CHECK-LABEL: @test9
-; CHECK-NEXT: call void @clobber()
-; CHECK-NEXT: ret i32 0
+; CHECK-LABEL: define i32 @test9(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    call void @clobber()
+; CHECK-NEXT:    ret i32 0
+;
 ; Since we know the location is invariant, we can forward the
 ; load across the potentially aliasing store (within the call).
 
@@ -103,9 +127,12 @@ define i32 @test9(ptr %p, ptr %q) {
 define i32 @test10(ptr %p, ptr %q) {
 ; If one access encloses the other, then the merged access is the enclosed one
 ; and not just the common final access type.
-; CHECK-LABEL: @test10
-; CHECK: call i32 @foo(ptr %p), !tbaa [[TAG_X_i:!.*]]
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test10(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA7:![0-9]+]]
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = call i32 @foo(ptr %p), !tbaa !15  ; TAG_X_i
   %b = call i32 @foo(ptr %p), !tbaa !19  ; TAG_Y_x_i
   %c = add i32 %a, %b
@@ -115,12 +142,6 @@ define i32 @test10(ptr %p, ptr %q) {
 declare void @clobber()
 declare i32 @foo(ptr) readonly
 
-; CHECK-DAG: [[TAGC]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0}
-; CHECK-DAG: [[TYPEC]] = !{!"C", [[TYPEA:!.*]]}
-; CHECK-DAG: [[TYPEA]] = !{!"A", !{{.*}}}
-; CHECK-DAG: [[TAGB]] = !{[[TYPEB:!.*]], [[TYPEB]], i64 0}
-; CHECK-DAG: [[TYPEB]] = !{!"B", [[TYPEA]]}
-; CHECK-DAG: [[TAGA]] = !{[[TYPEA]], [[TYPEA]], i64 0}
 !0 = !{!5, !5, i64 0}
 !1 = !{!6, !6, i64 0}
 !2 = !{!"tbaa root"}
@@ -132,9 +153,6 @@ declare i32 @foo(ptr) readonly
 !8 = !{!"another root"}
 !11 = !{!"scalar type", !8}
 
-; CHECK-DAG: [[TAG_X_i]] = !{[[TYPE_X:!.*]], [[TYPE_int:!.*]], i64 0}
-; CHECK-DAG: [[TYPE_X:!.*]] = !{!"struct X", [[TYPE_int]], i64 0}
-; CHECK-DAG: [[TYPE_int]] = !{!"int", {{!.*}}, i64 0}
 !15 = !{!16, !17, i64 0}            ; TAG_X_i
 !16 = !{!"struct X", !17, i64 0}    ; struct X { int i; };
 !17 = !{!"int", !18, i64 0}
@@ -146,3 +164,16 @@ declare i32 @foo(ptr) readonly
 ; A TBAA structure who's only point is to have a constant location.
 !9 = !{!"yet another root"}
 !10 = !{!"node", !9, i64 1}
+;.
+; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"C", [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"A", [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"tbaa root"}
+; CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[META5]] = !{!"B", [[META2]]}
+; CHECK: [[TBAA6]] = !{[[META2]], [[META2]], i64 0}
+; CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META9:![0-9]+]], i64 0}
+; CHECK: [[META8]] = !{!"struct X", [[META9]], i64 0}
+; CHECK: [[META9]] = !{!"int", [[META10:![0-9]+]], i64 0}
+; CHECK: [[META10]] = !{!"char", [[META3]], i64 0}
+;.
diff --git a/llvm/test/Transforms/NewGVN/unreachable_block_infinite_loop.ll b/llvm/test/Transforms/NewGVN/unreachable_block_infinite_loop.ll
index 7fbf506..70e5e1a 100644
--- a/llvm/test/Transforms/NewGVN/unreachable_block_infinite_loop.ll
+++ b/llvm/test/Transforms/NewGVN/unreachable_block_infinite_loop.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -5,11 +6,11 @@ target triple = "x86_64-apple-darwin10.0"
 
 define i32 @test2() nounwind ssp {
 entry:
-    ret i32 0
+  ret i32 0
 
 unreachable_block:
-    %a = add i32 %a, 1
-    ret i32 %a
+  %a = add i32 %a, 1
+  ret i32 %a
 }
 
 define i32 @pr23096_test0() {
diff --git a/llvm/test/Transforms/NewGVN/verify-memoryphi.ll b/llvm/test/Transforms/NewGVN/verify-memoryphi.ll
index b863d23..2a1fcf3 100644
--- a/llvm/test/Transforms/NewGVN/verify-memoryphi.ll
+++ b/llvm/test/Transforms/NewGVN/verify-memoryphi.ll
@@ -1,21 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; Skip dead MemoryPhis when performing memory congruency verification
 ; in NewGVN.
 ; RUN: opt -S -passes=newgvn %s | FileCheck %s
 ; REQUIRES: asserts
 
-; CHECK: define void @tinkywinky() {
-; CHECK-NEXT: entry:
-; CHECK-NEXT:   br i1 false, label %body, label %end
-; CHECK:      body:
-; CHECK-NEXT:   store i8 poison, ptr null
-; CHECK-NEXT:   br label %end
-; CHECK:      end:
-; CHECK-NEXT:   ret void
-; CHECK-NEXT: }
 
 declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
 
 define void @tinkywinky() {
+; CHECK-LABEL: define void @tinkywinky() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[BODY:%.*]], label [[END:%.*]]
+; CHECK:       body:
+; CHECK-NEXT:    store i8 poison, ptr null, align 1
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
 entry:
   call void @llvm.lifetime.start.p0(i64 4, ptr undef)
   br i1 false, label %body, label %end
diff --git a/llvm/test/Transforms/NewGVN/volatile-nonvolatile.ll b/llvm/test/Transforms/NewGVN/volatile-nonvolatile.ll
index 2febea7..d6daff9 100644
--- a/llvm/test/Transforms/NewGVN/volatile-nonvolatile.ll
+++ b/llvm/test/Transforms/NewGVN/volatile-nonvolatile.ll
@@ -1,13 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s
 
 %struct.t = type { ptr }
 
 ; The loaded address and the location of the address itself are not aliased,
 ; so the second reload is not necessary. Check that it can be eliminated.
-; CHECK-LABEL: test1
-; CHECK: load
-; CHECK-NOT: load
 define void @test1(ptr nocapture readonly %p, i32 %v) #0 {
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: ptr nocapture readonly [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[TBAA5]]
+; CHECK-NEXT:    ret void
+;
 entry:
   %0 = load ptr, ptr %p, align 4, !tbaa !1
   store volatile i32 %v, ptr %0, align 4, !tbaa !6
@@ -18,11 +24,16 @@ entry:
 
 ; The store via the loaded address may overwrite the address itself.
 ; Make sure that both loads remain.
-; CHECK-LABEL: test2
-; CHECK: load
-; CHECK: store
-; CHECK: load
 define void @test2(ptr nocapture readonly %p, i32 %v) #0 {
+; CHECK-LABEL: define void @test2(
+; CHECK-SAME: ptr nocapture readonly [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    ret void
+;
 entry:
   %0 = load ptr, ptr %p, align 4, !tbaa !1
   store volatile i32 %v, ptr %0, align 4, !tbaa !1
@@ -33,11 +44,16 @@ entry:
 
 ; The loads are ordered and non-monotonic. Although they are not aliased to
 ; the stores, make sure both are preserved.
-; CHECK-LABEL: test3
-; CHECK: load
-; CHECK: store
-; CHECK: load
 define void @test3(ptr nocapture readonly %p, i32 %v) #0 {
+; CHECK-LABEL: define void @test3(
+; CHECK-SAME: ptr nocapture readonly [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load atomic ptr, ptr [[P]] acquire, align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic ptr, ptr [[P]] acquire, align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP1]], align 4, !tbaa [[TBAA5]]
+; CHECK-NEXT:    ret void
+;
 entry:
   %0 = load atomic ptr, ptr %p acquire, align 4, !tbaa !1
   store volatile i32 %v, ptr %0, align 4, !tbaa !6
@@ -56,3 +72,12 @@ attributes #0 = { norecurse nounwind }
 !6 = !{!7, !7, i64 0}
 !7 = !{!"int", !4, i64 0}
 
+;.
+; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0}
+; CHECK: [[META1]] = !{!"", [[META2]], i64 0}
+; CHECK: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"int", [[META3]], i64 0}
+;.
diff --git a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
index b8f5d5d..05d9acd 100644
--- a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
+++ b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
@@ -240,3 +240,32 @@ then:
 else:
   ret i16 0
 }
+
+define i1 @test_add_nuw_sub(i32 %a) {
+; CHECK-LABEL: @test_add_nuw_sub(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[A:%.*]], 10000
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[ADD]], -5000
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %add = add nuw i32 %a, 10000
+  %sub = add i32 %add, -5000
+  %cond = icmp ult i32 %sub, 5000
+  ret i1 %cond
+}
+
+define i1 @test_add_nsw_sub(i32 %a) {
+; CHECK-LABEL: @test_add_nsw_sub(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[A:%.*]], 10000
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[ADD]], -5000
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[SUB]], 5000
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+entry:
+  %add = add nsw i32 %a, 10000
+  %sub = add i32 %add, -5000
+  %cond = icmp ult i32 %sub, 5000
+  ret i1 %cond
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-frem.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-frem.ll
new file mode 100644
index 0000000..a38f4bd
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-frem.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -S -mtriple=aarch64 -vector-library=ArmPL -passes=slp-vectorizer | FileCheck %s
+
+@a = common global ptr null, align 8
+
+define void @frem_v2double() {
+; CHECK-LABEL: define void @frem_v2double() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr @a, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @a, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = frem <2 x double> [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @a, align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a0 = load double, ptr getelementptr inbounds (double, ptr @a, i64 0), align 8
+  %a1 = load double, ptr getelementptr inbounds (double, ptr @a, i64 1), align 8
+  %b0 = load double, ptr getelementptr inbounds (double, ptr @a, i64 0), align 8
+  %b1 = load double, ptr getelementptr inbounds (double, ptr @a, i64 1), align 8
+  %r0 = frem double %a0, %b0
+  %r1 = frem double %a1, %b1
+  store double %r0, ptr getelementptr inbounds (double, ptr @a, i64 0), align 8
+  store double %r1, ptr getelementptr inbounds (double, ptr @a, i64 1), align 8
+  ret void
+}
+
+define void @frem_v4float() {
+; CHECK-LABEL: define void @frem_v4float() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr @a, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @a, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = frem <4 x float> [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @a, align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a0 = load float, ptr getelementptr inbounds (float, ptr @a, i64 0), align 8
+  %a1 = load float, ptr getelementptr inbounds (float, ptr @a, i64 1), align 8
+  %a2 = load float, ptr getelementptr inbounds (float, ptr @a, i64 2), align 8
+  %a3 = load float, ptr getelementptr inbounds (float, ptr @a, i64 3), align 8
+  %b0 = load float, ptr getelementptr inbounds (float, ptr @a, i64 0), align 8
+  %b1 = load float, ptr getelementptr inbounds (float, ptr @a, i64 1), align 8
+  %b2 = load float, ptr getelementptr inbounds (float, ptr @a, i64 2), align 8
+  %b3 = load float, ptr getelementptr inbounds (float, ptr @a, i64 3), align 8
+  %r0 = frem float %a0, %b0
+  %r1 = frem float %a1, %b1
+  %r2 = frem float %a2, %b2
+  %r3 = frem float %a3, %b3
+  store float %r0, ptr getelementptr inbounds (float, ptr @a, i64 0), align 8
+  store float %r1, ptr getelementptr inbounds (float, ptr @a, i64 1), align 8
+  store float %r2, ptr getelementptr inbounds (float, ptr @a, i64 2), align 8
+  store float %r3, ptr getelementptr inbounds (float, ptr @a, i64 3), align 8
+  ret void
+}
+
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
index ed73f7b..d87bdfe 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
@@ -6,7 +6,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[PIX1]], align 1
-; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = zext i8 [[TMP0]] to i32
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[PIX1]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i64> <i64 4, i64 6>
@@ -37,10 +37,10 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4
 ; CHECK-NEXT:    [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = load <2 x i8>, ptr [[ADD_PTR_1]], align 1
-; CHECK-NEXT:    [[TMP16:%.*]] = zext <2 x i8> [[TMP15]] to <2 x i32>
+; CHECK-NEXT:    [[TMP101:%.*]] = zext <2 x i8> [[TMP15]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP17:%.*]] = load <2 x i8>, ptr [[ADD_PTR64_1]], align 1
 ; CHECK-NEXT:    [[TMP18:%.*]] = zext <2 x i8> [[TMP17]] to <2 x i32>
-; CHECK-NEXT:    [[TMP19:%.*]] = sub <2 x i32> [[TMP16]], [[TMP18]]
+; CHECK-NEXT:    [[TMP19:%.*]] = sub <2 x i32> [[TMP101]], [[TMP18]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = load <2 x i8>, ptr [[ARRAYIDX3_2]], align 1
 ; CHECK-NEXT:    [[TMP21:%.*]] = zext <2 x i8> [[TMP20]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP22:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_2]], align 1
@@ -70,9 +70,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[SUB45_2:%.*]] = sub i32 [[TMP39]], [[TMP40]]
 ; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <2 x i32> [[TMP38]], i32 0
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x i32> [[TMP38]], i32 1
-; CHECK-NEXT:    [[ADD46_2:%.*]] = add i32 [[TMP42]], [[TMP41]]
+; CHECK-NEXT:    [[CONV:%.*]] = add i32 [[TMP42]], [[TMP41]]
 ; CHECK-NEXT:    [[SUB47_2:%.*]] = sub i32 [[TMP41]], [[TMP42]]
-; CHECK-NEXT:    [[ADD48_2:%.*]] = add i32 [[ADD46_2]], [[ADD44_2]]
+; CHECK-NEXT:    [[ADD48_2:%.*]] = add i32 [[CONV]], [[ADD44_2]]
 ; CHECK-NEXT:    [[TMP43:%.*]] = load i8, ptr null, align 1
 ; CHECK-NEXT:    [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2
 ; CHECK-NEXT:    [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2
@@ -104,32 +104,32 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[TMP69:%.*]] = sub <2 x i32> [[TMP66]], [[TMP68]]
 ; CHECK-NEXT:    [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], <i32 16, i32 16>
 ; CHECK-NEXT:    [[TMP71:%.*]] = add <2 x i32> [[TMP70]], [[TMP63]]
-; CHECK-NEXT:    [[TMP72:%.*]] = add <2 x i32> [[TMP71]], [[TMP58]]
+; CHECK-NEXT:    [[TMP16:%.*]] = add <2 x i32> [[TMP71]], [[TMP58]]
 ; CHECK-NEXT:    [[TMP73:%.*]] = sub <2 x i32> [[TMP58]], [[TMP71]]
-; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <2 x i32> [[TMP72]], i32 0
-; CHECK-NEXT:    [[TMP75:%.*]] = extractelement <2 x i32> [[TMP72]], i32 1
+; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
+; CHECK-NEXT:    [[TMP75:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1
 ; CHECK-NEXT:    [[ADD48_3:%.*]] = add i32 [[TMP74]], [[TMP75]]
 ; CHECK-NEXT:    [[ADD94:%.*]] = add i32 [[ADD48_3]], [[ADD48_2]]
 ; CHECK-NEXT:    [[SUB102:%.*]] = sub i32 [[ADD48_2]], [[ADD48_3]]
-; CHECK-NEXT:    [[TMP76:%.*]] = extractelement <2 x i32> [[TMP47]], i32 1
-; CHECK-NEXT:    [[SHR_I:%.*]] = lshr i32 [[TMP76]], 15
-; CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[SHR_I]], 65537
-; CHECK-NEXT:    [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535
-; CHECK-NEXT:    [[SHR_I49:%.*]] = lshr i32 [[ADD46_2]], 15
-; CHECK-NEXT:    [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537
-; CHECK-NEXT:    [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535
-; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
-; CHECK-NEXT:    [[SHR_I49_1:%.*]] = lshr i32 [[TMP77]], 15
-; CHECK-NEXT:    [[AND_I50_1:%.*]] = and i32 [[SHR_I49_1]], 65537
-; CHECK-NEXT:    [[MUL_I51_1:%.*]] = mul i32 [[AND_I50_1]], 65535
-; CHECK-NEXT:    [[SHR_I49_2:%.*]] = lshr i32 [[CONV_1]], 15
+; CHECK-NEXT:    [[TMP79:%.*]] = extractelement <2 x i32> [[TMP47]], i32 1
+; CHECK-NEXT:    [[SHR_I49_2:%.*]] = lshr i32 [[TMP79]], 15
 ; CHECK-NEXT:    [[AND_I50_2:%.*]] = and i32 [[SHR_I49_2]], 65537
 ; CHECK-NEXT:    [[MUL_I51_2:%.*]] = mul i32 [[AND_I50_2]], 65535
 ; CHECK-NEXT:    [[SHR_I49_3:%.*]] = lshr i32 [[CONV]], 15
 ; CHECK-NEXT:    [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537
 ; CHECK-NEXT:    [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535
+; CHECK-NEXT:    [[TMP107:%.*]] = extractelement <2 x i32> [[TMP101]], i32 0
+; CHECK-NEXT:    [[SHR_I49_1:%.*]] = lshr i32 [[TMP107]], 15
+; CHECK-NEXT:    [[AND_I50_1:%.*]] = and i32 [[SHR_I49_1]], 65537
+; CHECK-NEXT:    [[MUL_I51_1:%.*]] = mul i32 [[AND_I50_1]], 65535
+; CHECK-NEXT:    [[SHR_I49_4:%.*]] = lshr i32 [[CONV_1]], 15
+; CHECK-NEXT:    [[AND_I50_4:%.*]] = and i32 [[SHR_I49_4]], 65537
+; CHECK-NEXT:    [[MUL_I51_4:%.*]] = mul i32 [[AND_I50_4]], 65535
+; CHECK-NEXT:    [[SHR_I49_5:%.*]] = lshr i32 [[CONV1]], 15
+; CHECK-NEXT:    [[AND_I50_5:%.*]] = and i32 [[SHR_I49_5]], 65537
+; CHECK-NEXT:    [[MUL_I51_5:%.*]] = mul i32 [[AND_I50_5]], 65535
 ; CHECK-NEXT:    [[TMP78:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1
-; CHECK-NEXT:    [[TMP79:%.*]] = zext <2 x i8> [[TMP78]] to <2 x i32>
+; CHECK-NEXT:    [[TMP102:%.*]] = zext <2 x i8> [[TMP78]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP80:%.*]] = insertelement <2 x ptr> [[TMP5]], ptr [[ARRAYIDX22]], i32 1
 ; CHECK-NEXT:    [[TMP81:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP80]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
 ; CHECK-NEXT:    [[TMP82:%.*]] = zext <2 x i8> [[TMP81]] to <2 x i32>
@@ -147,20 +147,20 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[TMP94:%.*]] = zext <2 x i8> [[TMP93]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP95:%.*]] = sub <2 x i32> [[TMP92]], [[TMP94]]
 ; CHECK-NEXT:    [[TMP96:%.*]] = shl <2 x i32> [[TMP95]], <i32 16, i32 16>
-; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <2 x i32> [[TMP79]], i32 [[CONV33]], i32 1
+; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV33]], i32 1
 ; CHECK-NEXT:    [[TMP98:%.*]] = sub <2 x i32> [[TMP97]], [[TMP90]]
-; CHECK-NEXT:    [[TMP99:%.*]] = add <2 x i32> [[TMP96]], [[TMP98]]
-; CHECK-NEXT:    [[TMP100:%.*]] = insertelement <2 x i32> [[TMP79]], i32 [[CONV]], i32 0
-; CHECK-NEXT:    [[TMP101:%.*]] = sub <2 x i32> [[TMP100]], [[TMP82]]
-; CHECK-NEXT:    [[TMP102:%.*]] = add <2 x i32> [[TMP88]], [[TMP101]]
-; CHECK-NEXT:    [[TMP103:%.*]] = shufflevector <2 x i32> [[TMP99]], <2 x i32> [[TMP102]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[TMP104:%.*]] = add <2 x i32> [[TMP99]], [[TMP102]]
-; CHECK-NEXT:    [[TMP105:%.*]] = sub <2 x i32> [[TMP102]], [[TMP99]]
-; CHECK-NEXT:    [[TMP106:%.*]] = extractelement <2 x i32> [[TMP104]], i32 0
-; CHECK-NEXT:    [[TMP107:%.*]] = extractelement <2 x i32> [[TMP104]], i32 1
-; CHECK-NEXT:    [[ADD48:%.*]] = add i32 [[TMP107]], [[TMP106]]
+; CHECK-NEXT:    [[TMP104:%.*]] = add <2 x i32> [[TMP96]], [[TMP98]]
+; CHECK-NEXT:    [[TMP100:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV1]], i32 0
+; CHECK-NEXT:    [[TMP103:%.*]] = sub <2 x i32> [[TMP100]], [[TMP82]]
+; CHECK-NEXT:    [[TMP200:%.*]] = add <2 x i32> [[TMP88]], [[TMP103]]
+; CHECK-NEXT:    [[TMP128:%.*]] = shufflevector <2 x i32> [[TMP104]], <2 x i32> [[TMP200]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP165:%.*]] = add <2 x i32> [[TMP104]], [[TMP200]]
+; CHECK-NEXT:    [[TMP105:%.*]] = sub <2 x i32> [[TMP200]], [[TMP104]]
+; CHECK-NEXT:    [[TMP238:%.*]] = extractelement <2 x i32> [[TMP165]], i32 0
+; CHECK-NEXT:    [[TMP143:%.*]] = extractelement <2 x i32> [[TMP165]], i32 1
+; CHECK-NEXT:    [[ADD48:%.*]] = add i32 [[TMP143]], [[TMP238]]
 ; CHECK-NEXT:    [[TMP108:%.*]] = extractelement <2 x i32> [[TMP105]], i32 1
-; CHECK-NEXT:    [[SHR_I59:%.*]] = lshr i32 [[TMP107]], 15
+; CHECK-NEXT:    [[SHR_I59:%.*]] = lshr i32 [[TMP143]], 15
 ; CHECK-NEXT:    [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537
 ; CHECK-NEXT:    [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535
 ; CHECK-NEXT:    [[SHR_I59_1:%.*]] = lshr i32 [[TMP108]], 15
@@ -185,7 +185,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[TMP125:%.*]] = shl <2 x i32> [[TMP124]], <i32 16, i32 16>
 ; CHECK-NEXT:    [[TMP126:%.*]] = getelementptr i8, <2 x ptr> [[TMP120]], <2 x i64> <i64 1, i64 3>
 ; CHECK-NEXT:    [[TMP127:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP126]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
-; CHECK-NEXT:    [[TMP128:%.*]] = zext <2 x i8> [[TMP127]] to <2 x i32>
+; CHECK-NEXT:    [[TMP153:%.*]] = zext <2 x i8> [[TMP127]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP129:%.*]] = getelementptr i8, <2 x ptr> [[TMP115]], <2 x i64> <i64 5, i64 7>
 ; CHECK-NEXT:    [[TMP130:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP129]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
 ; CHECK-NEXT:    [[TMP131:%.*]] = zext <2 x i8> [[TMP130]] to <2 x i32>
@@ -195,15 +195,15 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[TMP135:%.*]] = sub <2 x i32> [[TMP131]], [[TMP134]]
 ; CHECK-NEXT:    [[TMP136:%.*]] = shl <2 x i32> [[TMP135]], <i32 16, i32 16>
 ; CHECK-NEXT:    [[TMP137:%.*]] = insertelement <2 x i32> [[TMP110]], i32 [[CONV33_1]], i32 1
-; CHECK-NEXT:    [[TMP138:%.*]] = sub <2 x i32> [[TMP137]], [[TMP128]]
+; CHECK-NEXT:    [[TMP138:%.*]] = sub <2 x i32> [[TMP137]], [[TMP153]]
 ; CHECK-NEXT:    [[TMP139:%.*]] = add <2 x i32> [[TMP136]], [[TMP138]]
 ; CHECK-NEXT:    [[TMP140:%.*]] = insertelement <2 x i32> [[TMP110]], i32 [[CONV_1]], i32 0
 ; CHECK-NEXT:    [[TMP141:%.*]] = sub <2 x i32> [[TMP140]], [[TMP113]]
 ; CHECK-NEXT:    [[TMP142:%.*]] = add <2 x i32> [[TMP125]], [[TMP141]]
-; CHECK-NEXT:    [[TMP143:%.*]] = add <2 x i32> [[TMP139]], [[TMP142]]
+; CHECK-NEXT:    [[TMP257:%.*]] = add <2 x i32> [[TMP139]], [[TMP142]]
 ; CHECK-NEXT:    [[TMP144:%.*]] = sub <2 x i32> [[TMP142]], [[TMP139]]
-; CHECK-NEXT:    [[TMP145:%.*]] = extractelement <2 x i32> [[TMP143]], i32 0
-; CHECK-NEXT:    [[TMP146:%.*]] = extractelement <2 x i32> [[TMP143]], i32 1
+; CHECK-NEXT:    [[TMP145:%.*]] = extractelement <2 x i32> [[TMP257]], i32 0
+; CHECK-NEXT:    [[TMP146:%.*]] = extractelement <2 x i32> [[TMP257]], i32 1
 ; CHECK-NEXT:    [[ADD48_1:%.*]] = add i32 [[TMP146]], [[TMP145]]
 ; CHECK-NEXT:    [[SHR_I54:%.*]] = lshr i32 [[TMP146]], 15
 ; CHECK-NEXT:    [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537
@@ -217,51 +217,51 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD94]]
 ; CHECK-NEXT:    [[ADD105:%.*]] = add i32 [[SUB102]], [[SUB86]]
 ; CHECK-NEXT:    [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB102]]
-; CHECK-NEXT:    [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]]
-; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP76]]
-; CHECK-NEXT:    [[ADD_I52:%.*]] = add i32 [[MUL_I51]], [[ADD105]]
-; CHECK-NEXT:    [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[ADD46_2]]
+; CHECK-NEXT:    [[ADD_I:%.*]] = add i32 [[MUL_I51_2]], [[ADD103]]
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP79]]
+; CHECK-NEXT:    [[ADD_I52:%.*]] = add i32 [[MUL_I51_3]], [[ADD105]]
+; CHECK-NEXT:    [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[CONV]]
 ; CHECK-NEXT:    [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]]
 ; CHECK-NEXT:    [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP146]]
 ; CHECK-NEXT:    [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]]
-; CHECK-NEXT:    [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP107]]
+; CHECK-NEXT:    [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP143]]
 ; CHECK-NEXT:    [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]]
 ; CHECK-NEXT:    [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]]
 ; CHECK-NEXT:    [[ADD113:%.*]] = add i32 [[ADD112]], [[XOR_I63]]
 ; CHECK-NEXT:    [[TMP150:%.*]] = shufflevector <2 x i32> [[TMP105]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP151:%.*]] = insertelement <2 x i32> [[TMP150]], i32 [[SUB47_2]], i32 1
 ; CHECK-NEXT:    [[TMP152:%.*]] = insertelement <2 x i32> [[TMP105]], i32 [[SUB45_2]], i32 1
-; CHECK-NEXT:    [[TMP153:%.*]] = add <2 x i32> [[TMP151]], [[TMP152]]
+; CHECK-NEXT:    [[TMP163:%.*]] = add <2 x i32> [[TMP151]], [[TMP152]]
 ; CHECK-NEXT:    [[TMP154:%.*]] = shufflevector <2 x i32> [[TMP144]], <2 x i32> [[TMP73]], <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT:    [[TMP155:%.*]] = shufflevector <2 x i32> [[TMP144]], <2 x i32> [[TMP73]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP156:%.*]] = add <2 x i32> [[TMP154]], [[TMP155]]
-; CHECK-NEXT:    [[TMP157:%.*]] = extractelement <2 x i32> [[TMP153]], i32 1
+; CHECK-NEXT:    [[TMP157:%.*]] = extractelement <2 x i32> [[TMP163]], i32 1
 ; CHECK-NEXT:    [[TMP158:%.*]] = extractelement <2 x i32> [[TMP156]], i32 1
-; CHECK-NEXT:    [[TMP159:%.*]] = shufflevector <2 x i32> [[TMP156]], <2 x i32> [[TMP153]], <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:    [[ADD94_1:%.*]] = add i32 [[TMP158]], [[TMP157]]
-; CHECK-NEXT:    [[TMP160:%.*]] = extractelement <2 x i32> [[TMP153]], i32 0
+; CHECK-NEXT:    [[TMP159:%.*]] = shufflevector <2 x i32> [[TMP156]], <2 x i32> [[TMP163]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[ADD78_2:%.*]] = add i32 [[TMP158]], [[TMP157]]
+; CHECK-NEXT:    [[TMP160:%.*]] = extractelement <2 x i32> [[TMP163]], i32 0
 ; CHECK-NEXT:    [[TMP161:%.*]] = extractelement <2 x i32> [[TMP156]], i32 0
-; CHECK-NEXT:    [[TMP162:%.*]] = shufflevector <2 x i32> [[TMP156]], <2 x i32> [[TMP153]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[ADD78_1:%.*]] = add i32 [[TMP161]], [[TMP160]]
-; CHECK-NEXT:    [[TMP163:%.*]] = sub <2 x i32> [[TMP153]], [[TMP156]]
-; CHECK-NEXT:    [[TMP164:%.*]] = extractelement <2 x i32> [[TMP163]], i32 0
-; CHECK-NEXT:    [[TMP165:%.*]] = extractelement <2 x i32> [[TMP163]], i32 1
-; CHECK-NEXT:    [[ADD105_1:%.*]] = add i32 [[TMP165]], [[TMP164]]
-; CHECK-NEXT:    [[SUB106_1:%.*]] = sub i32 [[TMP164]], [[TMP165]]
+; CHECK-NEXT:    [[TMP162:%.*]] = shufflevector <2 x i32> [[TMP156]], <2 x i32> [[TMP163]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[ADD94_1:%.*]] = add i32 [[TMP161]], [[TMP160]]
+; CHECK-NEXT:    [[TMP164:%.*]] = sub <2 x i32> [[TMP163]], [[TMP156]]
+; CHECK-NEXT:    [[TMP173:%.*]] = extractelement <2 x i32> [[TMP164]], i32 0
+; CHECK-NEXT:    [[TMP174:%.*]] = extractelement <2 x i32> [[TMP164]], i32 1
+; CHECK-NEXT:    [[ADD105_1:%.*]] = add i32 [[TMP174]], [[TMP173]]
+; CHECK-NEXT:    [[SUB106_1:%.*]] = sub i32 [[TMP173]], [[TMP174]]
 ; CHECK-NEXT:    [[ADD_I52_1:%.*]] = add i32 [[MUL_I51_1]], [[ADD105_1]]
-; CHECK-NEXT:    [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[TMP77]]
-; CHECK-NEXT:    [[TMP166:%.*]] = shufflevector <2 x i32> [[TMP16]], <2 x i32> [[TMP144]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[TMP107]]
+; CHECK-NEXT:    [[TMP166:%.*]] = shufflevector <2 x i32> [[TMP101]], <2 x i32> [[TMP144]], <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP167:%.*]] = lshr <2 x i32> [[TMP166]], <i32 15, i32 15>
 ; CHECK-NEXT:    [[TMP168:%.*]] = and <2 x i32> [[TMP167]], <i32 65537, i32 65537>
 ; CHECK-NEXT:    [[TMP169:%.*]] = mul <2 x i32> [[TMP168]], <i32 65535, i32 65535>
-; CHECK-NEXT:    [[TMP170:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_1]], i32 0
-; CHECK-NEXT:    [[TMP171:%.*]] = shufflevector <2 x i32> [[TMP170]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP172:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_1]], i32 0
-; CHECK-NEXT:    [[TMP173:%.*]] = shufflevector <2 x i32> [[TMP172]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP174:%.*]] = add <2 x i32> [[TMP171]], [[TMP173]]
-; CHECK-NEXT:    [[TMP175:%.*]] = sub <2 x i32> [[TMP171]], [[TMP173]]
-; CHECK-NEXT:    [[TMP176:%.*]] = shufflevector <2 x i32> [[TMP174]], <2 x i32> [[TMP175]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP177:%.*]] = add <2 x i32> [[TMP169]], [[TMP176]]
+; CHECK-NEXT:    [[TMP171:%.*]] = shufflevector <2 x i32> [[TMP172]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP208:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_2]], i32 0
+; CHECK-NEXT:    [[TMP209:%.*]] = shufflevector <2 x i32> [[TMP208]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP282:%.*]] = add <2 x i32> [[TMP171]], [[TMP209]]
+; CHECK-NEXT:    [[TMP211:%.*]] = sub <2 x i32> [[TMP171]], [[TMP209]]
+; CHECK-NEXT:    [[TMP283:%.*]] = shufflevector <2 x i32> [[TMP282]], <2 x i32> [[TMP211]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP177:%.*]] = add <2 x i32> [[TMP169]], [[TMP283]]
 ; CHECK-NEXT:    [[TMP178:%.*]] = xor <2 x i32> [[TMP177]], [[TMP166]]
 ; CHECK-NEXT:    [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]]
 ; CHECK-NEXT:    [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP108]]
@@ -271,90 +271,90 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[TMP180:%.*]] = extractelement <2 x i32> [[TMP178]], i32 1
 ; CHECK-NEXT:    [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[TMP180]]
 ; CHECK-NEXT:    [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[XOR_I63_1]]
-; CHECK-NEXT:    [[TMP181:%.*]] = shufflevector <2 x i32> [[TMP104]], <2 x i32> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT:    [[TMP181:%.*]] = shufflevector <2 x i32> [[TMP165]], <2 x i32> poison, <2 x i32> <i32 poison, i32 0>
 ; CHECK-NEXT:    [[TMP182:%.*]] = insertelement <2 x i32> [[TMP181]], i32 [[ADD44_2]], i32 0
-; CHECK-NEXT:    [[TMP183:%.*]] = insertelement <2 x i32> [[TMP104]], i32 [[ADD46_2]], i32 0
+; CHECK-NEXT:    [[TMP183:%.*]] = insertelement <2 x i32> [[TMP165]], i32 [[CONV]], i32 0
 ; CHECK-NEXT:    [[TMP184:%.*]] = sub <2 x i32> [[TMP182]], [[TMP183]]
-; CHECK-NEXT:    [[TMP185:%.*]] = shufflevector <2 x i32> [[TMP72]], <2 x i32> [[TMP143]], <2 x i32> <i32 1, i32 2>
-; CHECK-NEXT:    [[TMP186:%.*]] = shufflevector <2 x i32> [[TMP72]], <2 x i32> [[TMP143]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP185:%.*]] = shufflevector <2 x i32> [[TMP16]], <2 x i32> [[TMP257]], <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:    [[TMP186:%.*]] = shufflevector <2 x i32> [[TMP16]], <2 x i32> [[TMP257]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP187:%.*]] = sub <2 x i32> [[TMP185]], [[TMP186]]
 ; CHECK-NEXT:    [[TMP188:%.*]] = extractelement <2 x i32> [[TMP184]], i32 0
 ; CHECK-NEXT:    [[TMP189:%.*]] = extractelement <2 x i32> [[TMP187]], i32 0
 ; CHECK-NEXT:    [[TMP190:%.*]] = shufflevector <2 x i32> [[TMP187]], <2 x i32> [[TMP184]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[ADD94_2:%.*]] = add i32 [[TMP189]], [[TMP188]]
+; CHECK-NEXT:    [[ADD94_4:%.*]] = add i32 [[TMP189]], [[TMP188]]
 ; CHECK-NEXT:    [[TMP191:%.*]] = extractelement <2 x i32> [[TMP184]], i32 1
 ; CHECK-NEXT:    [[TMP192:%.*]] = extractelement <2 x i32> [[TMP187]], i32 1
 ; CHECK-NEXT:    [[TMP193:%.*]] = shufflevector <2 x i32> [[TMP187]], <2 x i32> [[TMP184]], <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:    [[ADD78_2:%.*]] = add i32 [[TMP192]], [[TMP191]]
+; CHECK-NEXT:    [[ADD94_2:%.*]] = add i32 [[TMP192]], [[TMP191]]
 ; CHECK-NEXT:    [[TMP194:%.*]] = sub <2 x i32> [[TMP184]], [[TMP187]]
-; CHECK-NEXT:    [[TMP195:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_2]], i32 0
-; CHECK-NEXT:    [[TMP196:%.*]] = shufflevector <2 x i32> [[TMP195]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP197:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_2]], i32 0
+; CHECK-NEXT:    [[TMP244:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_2]], i32 0
+; CHECK-NEXT:    [[TMP245:%.*]] = shufflevector <2 x i32> [[TMP244]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP197:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_4]], i32 0
 ; CHECK-NEXT:    [[TMP198:%.*]] = shufflevector <2 x i32> [[TMP197]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP199:%.*]] = add <2 x i32> [[TMP196]], [[TMP198]]
-; CHECK-NEXT:    [[TMP200:%.*]] = sub <2 x i32> [[TMP196]], [[TMP198]]
-; CHECK-NEXT:    [[TMP201:%.*]] = shufflevector <2 x i32> [[TMP199]], <2 x i32> [[TMP200]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP202:%.*]] = extractelement <2 x i32> [[TMP194]], i32 0
+; CHECK-NEXT:    [[TMP246:%.*]] = add <2 x i32> [[TMP245]], [[TMP198]]
+; CHECK-NEXT:    [[TMP247:%.*]] = sub <2 x i32> [[TMP245]], [[TMP198]]
+; CHECK-NEXT:    [[TMP248:%.*]] = shufflevector <2 x i32> [[TMP246]], <2 x i32> [[TMP247]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP215:%.*]] = extractelement <2 x i32> [[TMP194]], i32 0
 ; CHECK-NEXT:    [[TMP203:%.*]] = extractelement <2 x i32> [[TMP194]], i32 1
-; CHECK-NEXT:    [[ADD105_2:%.*]] = add i32 [[TMP202]], [[TMP203]]
-; CHECK-NEXT:    [[SUB106_2:%.*]] = sub i32 [[TMP203]], [[TMP202]]
-; CHECK-NEXT:    [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD105_2]]
+; CHECK-NEXT:    [[ADD105_2:%.*]] = add i32 [[TMP215]], [[TMP203]]
+; CHECK-NEXT:    [[SUB106_2:%.*]] = sub i32 [[TMP203]], [[TMP215]]
+; CHECK-NEXT:    [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_4]], [[ADD105_2]]
 ; CHECK-NEXT:    [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]]
-; CHECK-NEXT:    [[TMP204:%.*]] = add <2 x i32> [[TMP149]], [[TMP201]]
-; CHECK-NEXT:    [[TMP205:%.*]] = xor <2 x i32> [[TMP204]], [[TMP110]]
-; CHECK-NEXT:    [[SHR_I59_2:%.*]] = lshr i32 [[TMP106]], 15
+; CHECK-NEXT:    [[TMP266:%.*]] = add <2 x i32> [[TMP149]], [[TMP248]]
+; CHECK-NEXT:    [[TMP267:%.*]] = xor <2 x i32> [[TMP266]], [[TMP110]]
+; CHECK-NEXT:    [[SHR_I59_2:%.*]] = lshr i32 [[TMP238]], 15
 ; CHECK-NEXT:    [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537
 ; CHECK-NEXT:    [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535
 ; CHECK-NEXT:    [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]]
-; CHECK-NEXT:    [[XOR_I63_2:%.*]] = xor i32 [[ADD_I62_2]], [[TMP106]]
+; CHECK-NEXT:    [[XOR_I63_2:%.*]] = xor i32 [[ADD_I62_2]], [[TMP238]]
 ; CHECK-NEXT:    [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_1]]
-; CHECK-NEXT:    [[TMP206:%.*]] = extractelement <2 x i32> [[TMP205]], i32 0
+; CHECK-NEXT:    [[TMP206:%.*]] = extractelement <2 x i32> [[TMP267]], i32 0
 ; CHECK-NEXT:    [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP206]]
-; CHECK-NEXT:    [[TMP207:%.*]] = extractelement <2 x i32> [[TMP205]], i32 1
+; CHECK-NEXT:    [[TMP207:%.*]] = extractelement <2 x i32> [[TMP267]], i32 1
 ; CHECK-NEXT:    [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[TMP207]]
 ; CHECK-NEXT:    [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I63_2]]
-; CHECK-NEXT:    [[TMP208:%.*]] = insertelement <2 x i32> [[TMP150]], i32 [[SUB45_2]], i32 0
-; CHECK-NEXT:    [[TMP209:%.*]] = insertelement <2 x i32> [[TMP105]], i32 [[SUB47_2]], i32 0
-; CHECK-NEXT:    [[TMP210:%.*]] = sub <2 x i32> [[TMP208]], [[TMP209]]
-; CHECK-NEXT:    [[TMP211:%.*]] = shufflevector <2 x i32> [[TMP73]], <2 x i32> [[TMP144]], <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:    [[TMP221:%.*]] = insertelement <2 x i32> [[TMP150]], i32 [[SUB45_2]], i32 0
+; CHECK-NEXT:    [[TMP222:%.*]] = insertelement <2 x i32> [[TMP105]], i32 [[SUB47_2]], i32 0
+; CHECK-NEXT:    [[TMP210:%.*]] = sub <2 x i32> [[TMP221]], [[TMP222]]
+; CHECK-NEXT:    [[TMP225:%.*]] = shufflevector <2 x i32> [[TMP73]], <2 x i32> [[TMP144]], <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT:    [[TMP212:%.*]] = shufflevector <2 x i32> [[TMP73]], <2 x i32> [[TMP144]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP213:%.*]] = sub <2 x i32> [[TMP211]], [[TMP212]]
+; CHECK-NEXT:    [[TMP226:%.*]] = sub <2 x i32> [[TMP225]], [[TMP212]]
 ; CHECK-NEXT:    [[TMP214:%.*]] = extractelement <2 x i32> [[TMP210]], i32 0
-; CHECK-NEXT:    [[TMP215:%.*]] = extractelement <2 x i32> [[TMP213]], i32 0
-; CHECK-NEXT:    [[TMP216:%.*]] = shufflevector <2 x i32> [[TMP213]], <2 x i32> [[TMP210]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[ADD94_3:%.*]] = add i32 [[TMP215]], [[TMP214]]
+; CHECK-NEXT:    [[TMP227:%.*]] = extractelement <2 x i32> [[TMP226]], i32 0
+; CHECK-NEXT:    [[TMP216:%.*]] = shufflevector <2 x i32> [[TMP226]], <2 x i32> [[TMP210]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[ADD94_3:%.*]] = add i32 [[TMP227]], [[TMP214]]
 ; CHECK-NEXT:    [[TMP217:%.*]] = extractelement <2 x i32> [[TMP210]], i32 1
-; CHECK-NEXT:    [[TMP218:%.*]] = extractelement <2 x i32> [[TMP213]], i32 1
-; CHECK-NEXT:    [[TMP219:%.*]] = shufflevector <2 x i32> [[TMP213]], <2 x i32> [[TMP210]], <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:    [[ADD78_3:%.*]] = add i32 [[TMP218]], [[TMP217]]
-; CHECK-NEXT:    [[TMP220:%.*]] = sub <2 x i32> [[TMP210]], [[TMP213]]
-; CHECK-NEXT:    [[TMP221:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_3]], i32 0
-; CHECK-NEXT:    [[TMP222:%.*]] = shufflevector <2 x i32> [[TMP221]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP218:%.*]] = extractelement <2 x i32> [[TMP226]], i32 1
+; CHECK-NEXT:    [[TMP219:%.*]] = shufflevector <2 x i32> [[TMP226]], <2 x i32> [[TMP210]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[SUB59:%.*]] = add i32 [[TMP218]], [[TMP217]]
+; CHECK-NEXT:    [[TMP220:%.*]] = sub <2 x i32> [[TMP210]], [[TMP226]]
+; CHECK-NEXT:    [[TMP274:%.*]] = insertelement <2 x i32> poison, i32 [[SUB59]], i32 0
+; CHECK-NEXT:    [[TMP275:%.*]] = shufflevector <2 x i32> [[TMP274]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP223:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_3]], i32 0
 ; CHECK-NEXT:    [[TMP224:%.*]] = shufflevector <2 x i32> [[TMP223]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP225:%.*]] = add <2 x i32> [[TMP222]], [[TMP224]]
-; CHECK-NEXT:    [[TMP226:%.*]] = sub <2 x i32> [[TMP222]], [[TMP224]]
-; CHECK-NEXT:    [[TMP227:%.*]] = shufflevector <2 x i32> [[TMP225]], <2 x i32> [[TMP226]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP276:%.*]] = add <2 x i32> [[TMP275]], [[TMP224]]
+; CHECK-NEXT:    [[TMP277:%.*]] = sub <2 x i32> [[TMP275]], [[TMP224]]
+; CHECK-NEXT:    [[TMP278:%.*]] = shufflevector <2 x i32> [[TMP276]], <2 x i32> [[TMP277]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP228:%.*]] = extractelement <2 x i32> [[TMP220]], i32 0
 ; CHECK-NEXT:    [[TMP229:%.*]] = extractelement <2 x i32> [[TMP220]], i32 1
 ; CHECK-NEXT:    [[ADD105_3:%.*]] = add i32 [[TMP228]], [[TMP229]]
 ; CHECK-NEXT:    [[SUB106_3:%.*]] = sub i32 [[TMP229]], [[TMP228]]
-; CHECK-NEXT:    [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_3]]
-; CHECK-NEXT:    [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV]]
-; CHECK-NEXT:    [[TMP230:%.*]] = lshr <2 x i32> [[TMP79]], <i32 15, i32 15>
+; CHECK-NEXT:    [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_5]], [[ADD105_3]]
+; CHECK-NEXT:    [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV1]]
+; CHECK-NEXT:    [[TMP230:%.*]] = lshr <2 x i32> [[TMP102]], <i32 15, i32 15>
 ; CHECK-NEXT:    [[TMP231:%.*]] = and <2 x i32> [[TMP230]], <i32 65537, i32 65537>
 ; CHECK-NEXT:    [[TMP232:%.*]] = mul <2 x i32> [[TMP231]], <i32 65535, i32 65535>
-; CHECK-NEXT:    [[TMP233:%.*]] = add <2 x i32> [[TMP232]], [[TMP227]]
-; CHECK-NEXT:    [[TMP234:%.*]] = xor <2 x i32> [[TMP233]], [[TMP79]]
+; CHECK-NEXT:    [[TMP286:%.*]] = add <2 x i32> [[TMP232]], [[TMP278]]
+; CHECK-NEXT:    [[TMP287:%.*]] = xor <2 x i32> [[TMP286]], [[TMP102]]
 ; CHECK-NEXT:    [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15
 ; CHECK-NEXT:    [[AND_I60_3:%.*]] = and i32 [[SHR_I59_3]], 65537
 ; CHECK-NEXT:    [[MUL_I61_3:%.*]] = mul i32 [[AND_I60_3]], 65535
 ; CHECK-NEXT:    [[ADD_I62_3:%.*]] = add i32 [[MUL_I61_3]], [[SUB106_3]]
 ; CHECK-NEXT:    [[XOR_I63_3:%.*]] = xor i32 [[ADD_I62_3]], [[CONV33]]
 ; CHECK-NEXT:    [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_2]]
-; CHECK-NEXT:    [[TMP235:%.*]] = extractelement <2 x i32> [[TMP234]], i32 0
+; CHECK-NEXT:    [[TMP235:%.*]] = extractelement <2 x i32> [[TMP287]], i32 0
 ; CHECK-NEXT:    [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP235]]
-; CHECK-NEXT:    [[TMP236:%.*]] = extractelement <2 x i32> [[TMP234]], i32 1
+; CHECK-NEXT:    [[TMP236:%.*]] = extractelement <2 x i32> [[TMP287]], i32 1
 ; CHECK-NEXT:    [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[TMP236]]
 ; CHECK-NEXT:    [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]]
 ; CHECK-NEXT:    ret i32 [[ADD113_3]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
index 12b227c..a153c3c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
@@ -25,8 +25,8 @@ define i32 @fn1() {
 ; CHECK-NEXT:    store i32 [[AND]], ptr @a, align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    switch i32 [[AND]], label [[IF_END:%.*]] [
-; CHECK-NEXT:    i32 7, label [[SAVE_STATE_AND_RETURN]]
-; CHECK-NEXT:    i32 0, label [[SAVE_STATE_AND_RETURN]]
+; CHECK-NEXT:      i32 7, label [[SAVE_STATE_AND_RETURN]]
+; CHECK-NEXT:      i32 0, label [[SAVE_STATE_AND_RETURN]]
 ; CHECK-NEXT:    ]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    br label [[SAVE_STATE_AND_RETURN]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll
new file mode 100644
index 0000000..759cb65
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux -mcpu=cascadelake < %s | FileCheck %s
+
+define i1 @test(ptr noalias %0, i64 %1, ptr noalias %p, ptr %p1) {
+; CHECK-LABEL: define i1 @test(
+; CHECK-SAME: ptr noalias [[TMP0:%.*]], i64 [[TMP1:%.*]], ptr noalias [[P:%.*]], ptr [[P1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
+; CHECK-NEXT:    [[BF_LOAD_I1336:%.*]] = load i24, ptr [[TMP2]], align 16
+; CHECK-NEXT:    [[AND_I_I_I1342:%.*]] = and i64 [[TMP1]], -16
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[AND_I_I_I1342]] to ptr
+; CHECK-NEXT:    store ptr [[TMP3]], ptr [[P]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 16
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16
+; CHECK-NEXT:    [[BF_LOAD_I1345:%.*]] = load i24, ptr [[TMP5]], align 16
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i24> poison, i24 [[BF_LOAD_I1336]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i24> [[TMP6]], i24 [[BF_LOAD_I1345]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = and <2 x i24> [[TMP7]], <i24 255, i24 255>
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq <2 x i24> [[TMP8]], <i24 24, i24 24>
+; CHECK-NEXT:    [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i24> <i24 23, i24 23>, <2 x i24> [[TMP8]]
+; CHECK-NEXT:    [[TMP11:%.*]] = zext <2 x i24> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP12:%.*]] = and <2 x i32> [[TMP11]], <i32 254, i32 254>
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq <2 x i32> [[TMP12]], <i32 4, i32 4>
+; CHECK-NEXT:    [[TMP14:%.*]] = select <2 x i1> [[TMP13]], <2 x i32> <i32 2, i32 2>, <2 x i32> [[TMP11]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP14]], <i32 32, i32 32>
+; CHECK-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> <i32 31, i32 31>, <2 x i32> [[TMP14]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq <2 x i32> [[TMP16]], <i32 54, i32 54>
+; CHECK-NEXT:    [[TMP18:%.*]] = select <2 x i1> [[TMP17]], <2 x i32> <i32 53, i32 53>, <2 x i32> [[TMP16]]
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x i32> [[TMP18]], i32 0
+; CHECK-NEXT:    store i32 [[TMP19]], ptr [[P1]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x i32> [[TMP18]], i32 1
+; CHECK-NEXT:    [[CMP210_NOT:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]]
+; CHECK-NEXT:    ret i1 [[CMP210_NOT]]
+;
+newFuncRoot:
+  %2 = getelementptr inbounds i8, ptr %0, i64 16
+  %bf.load.i1336 = load i24, ptr %2, align 16
+  %bf.clear.i1337 = and i24 %bf.load.i1336, 255
+  %and.i.i.i1342 = and i64 %1, -16
+  %3 = inttoptr i64 %and.i.i.i1342 to ptr
+  store ptr %3, ptr %p, align 8
+  %4 = load ptr, ptr %3, align 16
+  %5 = getelementptr inbounds i8, ptr %4, i64 16
+  %bf.load.i1345 = load i24, ptr %5, align 16
+  %bf.clear.i1346 = and i24 %bf.load.i1345, 255
+  %cmp182 = icmp eq i24 %bf.clear.i1337, 24
+  %narrow = select i1 %cmp182, i24 23, i24 %bf.clear.i1337
+  %s = zext nneg i24 %narrow to i32
+  %cmp185 = icmp eq i24 %bf.clear.i1346, 24
+  %narrow1790 = select i1 %cmp185, i24 23, i24 %bf.clear.i1346
+  %s1139 = zext nneg i24 %narrow1790 to i32
+  %6 = and i32 %s, 254
+  %or.cond1132 = icmp eq i32 %6, 4
+  %s1142 = select i1 %or.cond1132, i32 2, i32 %s
+  %7 = and i32 %s1139, 254
+  %or.cond1133 = icmp eq i32 %7, 4
+  %s1140 = select i1 %or.cond1133, i32 2, i32 %s1139
+  %cmp198 = icmp eq i32 %s1142, 32
+  %s1134 = select i1 %cmp198, i32 31, i32 %s1142
+  %cmp201 = icmp eq i32 %s1140, 32
+  %s1143 = select i1 %cmp201, i32 31, i32 %s1140
+  %cmp204 = icmp eq i32 %s1134, 54
+  %s1135 = select i1 %cmp204, i32 53, i32 %s1134
+  store i32 %s1135, ptr %p1, align 4
+  %cmp207 = icmp eq i32 %s1143, 54
+  %s1141 = select i1 %cmp207, i32 53, i32 %s1143
+  %cmp210.not = icmp eq i32 %s1135, %s1141
+  ret i1 %cmp210.not
+}
+
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll
index 6f5d3d3..f6db831 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll
@@ -108,3 +108,111 @@ entry:
   store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4
   ret void
 }
+
+define void @test_div() {
+; CHECK-LABEL: define void @test_div(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
+; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 1, i64 33, i64 7, i64 0>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i32> [[TMP3]] to <4 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[TMP4]], <i64 1, i64 2, i64 1, i64 2>
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32>
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx1 = getelementptr i32, ptr null, i64 1
+  %0 = load i32, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr i32, ptr null, i64 63
+  %1 = load i32, ptr %arrayidx2, align 4
+  %mul = mul i32 %1, %0
+  %conv = zext i32 %mul to i64
+  %shr = udiv i64 %conv, 1
+  %conv3 = trunc i64 %shr to i32
+  store i32 %conv3, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
+  %arrayidx5 = getelementptr i32, ptr null, i64 33
+  %2 = load i32, ptr %arrayidx5, align 4
+  %arrayidx6 = getelementptr i32, ptr null, i64 62
+  %3 = load i32, ptr %arrayidx6, align 4
+  %mul7 = mul i32 %3, %2
+  %conv8 = zext i32 %mul7 to i64
+  %shr10 = udiv i64 %conv8, 2
+  %conv11 = trunc i64 %shr10 to i32
+  store i32 %conv11, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 1), align 4
+  %arrayidx13 = getelementptr i32, ptr null, i64 7
+  %4 = load i32, ptr %arrayidx13, align 4
+  %arrayidx14 = getelementptr i32, ptr null, i64 61
+  %5 = load i32, ptr %arrayidx14, align 4
+  %mul15 = mul i32 %5, %4
+  %conv16 = zext i32 %mul15 to i64
+  %shr18 = udiv i64 %conv16, 1
+  %conv19 = trunc i64 %shr18 to i32
+  store i32 %conv19, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 2), align 8
+  %6 = load i32, ptr null, align 4
+  %arrayidx22 = getelementptr i32, ptr null, i64 60
+  %7 = load i32, ptr %arrayidx22, align 4
+  %mul23 = mul i32 %7, %6
+  %conv24 = zext i32 %mul23 to i64
+  %shr26 = udiv i64 %conv24, 2
+  %conv27 = trunc i64 %shr26 to i32
+  store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4
+  ret void
+}
+
+define void @test_rem() {
+; CHECK-LABEL: define void @test_rem(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
+; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 1, i64 33, i64 7, i64 0>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i32> [[TMP3]] to <4 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = urem <4 x i64> [[TMP4]], <i64 1, i64 2, i64 1, i64 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32>
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx1 = getelementptr i32, ptr null, i64 1
+  %0 = load i32, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr i32, ptr null, i64 63
+  %1 = load i32, ptr %arrayidx2, align 4
+  %mul = mul i32 %1, %0
+  %conv = zext i32 %mul to i64
+  %shr = urem i64 %conv, 1
+  %conv3 = trunc i64 %shr to i32
+  store i32 %conv3, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
+  %arrayidx5 = getelementptr i32, ptr null, i64 33
+  %2 = load i32, ptr %arrayidx5, align 4
+  %arrayidx6 = getelementptr i32, ptr null, i64 62
+  %3 = load i32, ptr %arrayidx6, align 4
+  %mul7 = mul i32 %3, %2
+  %conv8 = zext i32 %mul7 to i64
+  %shr10 = urem i64 %conv8, 2
+  %conv11 = trunc i64 %shr10 to i32
+  store i32 %conv11, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 1), align 4
+  %arrayidx13 = getelementptr i32, ptr null, i64 7
+  %4 = load i32, ptr %arrayidx13, align 4
+  %arrayidx14 = getelementptr i32, ptr null, i64 61
+  %5 = load i32, ptr %arrayidx14, align 4
+  %mul15 = mul i32 %5, %4
+  %conv16 = zext i32 %mul15 to i64
+  %shr18 = urem i64 %conv16, 1
+  %conv19 = trunc i64 %shr18 to i32
+  store i32 %conv19, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 2), align 8
+  %6 = load i32, ptr null, align 4
+  %arrayidx22 = getelementptr i32, ptr null, i64 60
+  %7 = load i32, ptr %arrayidx22, align 4
+  %mul23 = mul i32 %7, %6
+  %conv24 = zext i32 %mul23 to i64
+  %shr26 = urem i64 %conv24, 1
+  %conv27 = trunc i64 %shr26 to i32
+  store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll
index 65229d8..f0e734d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll
@@ -10,29 +10,29 @@ define  void @foo (ptr %A, ptr %B, ptr %Result) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP18:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP17:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP20:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX:%.*]], ptr [[A:%.*]], i64 [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], ptr [[B:%.*]], i64 [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], ptr [[B]], i64 [[TMP1]], i32 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x float>, ptr [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP11:%.*]] = fmul <2 x float> [[TMP9]], [[SHUFFLE]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load <2 x float>, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul <2 x float> [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
-; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP13:%.*]] = fmul <2 x float> [[TMP9]], [[SHUFFLE1]]
-; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP13]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[TMP14:%.*]] = fsub <2 x float> [[TMP11]], [[SHUFFLE2]]
-; CHECK-NEXT:    [[TMP15:%.*]] = fadd <2 x float> [[TMP11]], [[SHUFFLE2]]
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x float> [[TMP14]], <2 x float> [[TMP15]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP17]] = fadd <2 x float> [[TMP2]], [[TMP16]]
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = fmul <2 x float> [[TMP8]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <2 x float> [[TMP14]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP16:%.*]] = fsub <2 x float> [[TMP11]], [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = fadd <2 x float> [[TMP11]], [[TMP15]]
+; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> [[TMP17]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP20]] = fadd <2 x float> [[TMP2]], [[TMP21]]
 ; CHECK-NEXT:    [[TMP18]] = add nuw nsw i64 [[TMP1]], 1
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[TMP19]], label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
-; CHECK-NEXT:    store <2 x float> [[TMP17]], ptr [[RESULT:%.*]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP20]], ptr [[RESULT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
index 22cd408..e8395fe 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
@@ -179,11 +179,11 @@ define void @addsub0(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4
 ; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
 ; CHECK-NEXT:    store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i32> [[TMP3]], <i32 -2, i32 -3>
-; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <2 x i32> [[TMP3]], <i32 -2, i32 -3>
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    store <2 x i32> [[TMP6]], ptr [[INCDEC_PTR3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr [[INCDEC_PTR3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -212,11 +212,11 @@ define void @addsub1(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
 ; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw <2 x i32> [[TMP1]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    store <2 x i32> [[TMP4]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <2 x i32> [[TMP0]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    store <2 x i32> [[TMP3]], ptr [[DST]], align 4
 ; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4
 ; CHECK-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
@@ -531,11 +531,11 @@ define void @addsub0f(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[INCDEC_PTR]], align 4
 ; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
 ; CHECK-NEXT:    store float [[TMP1]], ptr [[INCDEC_PTR1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <2 x float> [[TMP3]], <float -2.000000e+00, float -3.000000e+00>
-; CHECK-NEXT:    [[TMP5:%.*]] = fsub fast <2 x float> [[TMP3]], <float -2.000000e+00, float -3.000000e+00>
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    store <2 x float> [[TMP6]], ptr [[INCDEC_PTR3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <2 x float> [[TMP2]], <float -2.000000e+00, float -3.000000e+00>
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub fast <2 x float> [[TMP2]], <float -2.000000e+00, float -3.000000e+00>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[INCDEC_PTR3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -564,11 +564,11 @@ define void @addsub1f(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
 ; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <2 x float> [[TMP1]], <float -1.000000e+00, float -1.000000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub fast <2 x float> [[TMP1]], <float -1.000000e+00, float -1.000000e+00>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    store <2 x float> [[TMP4]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <2 x float> [[TMP0]], <float -1.000000e+00, float -1.000000e+00>
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast <2 x float> [[TMP0]], <float -1.000000e+00, float -1.000000e+00>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    store <2 x float> [[TMP3]], ptr [[DST]], align 4
 ; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
 ; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[INCDEC_PTR2]], align 4
 ; CHECK-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/01-wasm-compare-logical-elements.test b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/01-wasm-compare-logical-elements.test
new file mode 100644
index 0000000..f52c9c7
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/01-wasm-compare-logical-elements.test
@@ -0,0 +1,106 @@
+; REQUIRES: webassembly-registered-target
+
+; Test case 1 - General options
+
+; test.cpp
+;  1  using INTPTR = const int *;
+;  2  int foo(INTPTR ParamPtr, unsigned ParamUnsigned, bool ParamBool) {
+;  3    if (ParamBool) {
+;  4      typedef int INTEGER;
+;  5      const INTEGER CONSTANT = 7;
+;  6      return CONSTANT;
+;  7    }
+;  8    return ParamUnsigned;
+;  9  }
+
+; Compare mode - Logical view.
+; The output shows in view form the 'missing (-), added (+)' elements,
+; giving more context by swapping the reference and target object files.
+
+; RUN: llvm-mc -arch=wasm32 -filetype=obj \
+; RUN:         %p/Inputs/test-clang.s -o %t.test-clang.o
+
+; RUN: llvm-debuginfo-analyzer --attribute=level \
+; RUN:                         --compare=types \
+; RUN:                         --report=view \
+; RUN:                         --print=symbols,types \
+; RUN:                         %t.test-clang.o \
+; RUN:                         %p/../DWARF/Inputs/test-dwarf-gcc.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=ONE %s
+
+; ONE:      Reference: '{{.*}}test-clang.o'
+; ONE-NEXT: Target:    'test-dwarf-gcc.o'
+; ONE-EMPTY:
+; ONE-NEXT: Logical View:
+; ONE-NEXT:  [000]           {File} '{{.*}}test-clang.o'
+; ONE-EMPTY:
+; ONE-NEXT:  [001]             {CompileUnit} 'test.cpp'
+; ONE-NEXT:  [002]     1         {TypeAlias} 'INTPTR' -> '* const int'
+; ONE-NEXT:  [002]     2         {Function} extern not_inlined 'foo' -> 'int'
+; ONE-NEXT:  [003]                 {Block}
+; ONE-NEXT:  [004]     5             {Variable} 'CONSTANT' -> 'const INTEGER'
+; ONE-NEXT: +[004]     4             {TypeAlias} 'INTEGER' -> 'int'
+; ONE-NEXT:  [003]     2           {Parameter} 'ParamBool' -> 'bool'
+; ONE-NEXT:  [003]     2           {Parameter} 'ParamPtr' -> 'INTPTR'
+; ONE-NEXT:  [003]     2           {Parameter} 'ParamUnsigned' -> 'unsigned int'
+; ONE-NEXT: -[003]     4           {TypeAlias} 'INTEGER' -> 'int'
+
+; Compare mode - Logical elements.
+; The output shows in tabular form the 'missing (-), added (+)' elements,
+; giving more context by swapping the reference and target object files.
+
+; RUN: llvm-debuginfo-analyzer --attribute=level \
+; RUN:                         --compare=types \
+; RUN:                         --report=list \
+; RUN:                         --print=symbols,types,summary \
+; RUN:                         %t.test-clang.o \
+; RUN:                         %p/../DWARF/Inputs/test-dwarf-gcc.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=TWO %s
+
+; TWO:      Reference: '{{.*}}test-clang.o'
+; TWO-NEXT: Target:    'test-dwarf-gcc.o'
+; TWO-EMPTY:
+; TWO-NEXT: (1) Missing Types:
+; TWO-NEXT: -[003]     4     {TypeAlias} 'INTEGER' -> 'int'
+; TWO-EMPTY:
+; TWO-NEXT: (1) Added Types:
+; TWO-NEXT: +[004]     4     {TypeAlias} 'INTEGER' -> 'int'
+; TWO-EMPTY:
+; TWO-NEXT: ----------------------------------------
+; TWO-NEXT: Element   Expected    Missing      Added
+; TWO-NEXT: ----------------------------------------
+; TWO-NEXT: Scopes           4          0          0
+; TWO-NEXT: Symbols          0          0          0
+; TWO-NEXT: Types            2          1          1
+; TWO-NEXT: Lines            0          0          0
+; TWO-NEXT: ----------------------------------------
+; TWO-NEXT: Total            6          1          1
+
+; Changing the 'Reference' and 'Target' order:
+
+; RUN: llvm-debuginfo-analyzer --attribute=level \
+; RUN:                         --compare=types \
+; RUN:                         --report=list \
+; RUN:                         --print=symbols,types,summary \
+; RUN:                         %p/../DWARF/Inputs/test-dwarf-gcc.o \
+; RUN:                         %t.test-clang.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=THR %s
+
+; THR:      Reference: 'test-dwarf-gcc.o'
+; THR-NEXT: Target:    '{{.*}}test-clang.o'
+; THR-EMPTY:
+; THR-NEXT: (1) Missing Types:
+; THR-NEXT: -[004]     4     {TypeAlias} 'INTEGER' -> 'int'
+; THR-EMPTY:
+; THR-NEXT: (1) Added Types:
+; THR-NEXT: +[003]     4     {TypeAlias} 'INTEGER' -> 'int'
+; THR-EMPTY:
+; THR-NEXT: ----------------------------------------
+; THR-NEXT: Element   Expected    Missing      Added
+; THR-NEXT: ----------------------------------------
+; THR-NEXT: Scopes           4          0          0
+; THR-NEXT: Symbols          0          0          0
+; THR-NEXT: Types            2          1          1
+; THR-NEXT: Lines            0          0          0
+; THR-NEXT: ----------------------------------------
+; THR-NEXT: Total            6          1          1
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/01-wasm-print-basic-details.test b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/01-wasm-print-basic-details.test
new file mode 100644
index 0000000..4927086
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/01-wasm-print-basic-details.test
@@ -0,0 +1,120 @@
+; REQUIRES: webassembly-registered-target
+
+; Test case 1 - General options.
+
+; test.cpp
+;  1  using INTPTR = const int *;
+;  2  int foo(INTPTR ParamPtr, unsigned ParamUnsigned, bool ParamBool) {
+;  3    if (ParamBool) {
+;  4      typedef int INTEGER;
+;  5      const INTEGER CONSTANT = 7;
+;  6      return CONSTANT;
+;  7    }
+;  8    return ParamUnsigned;
+;  9  }
+
+; Print basic details.
+; The following command prints basic details for all the logical elements
+; sorted by the debug information internal offset; it includes its lexical
+; level and debug info format.
+
+; RUN: llvm-mc -arch=wasm32 -filetype=obj \
+; RUN:         %p/Inputs/test-clang.s -o %t.test-clang.o
+
+; RUN: llvm-debuginfo-analyzer --attribute=level,format \
+; RUN:                         --output-sort=offset \
+; RUN:                         --print=scopes,symbols,types,lines,instructions \
+; RUN:                         %t.test-clang.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=ONE %s
+
+; RUN: llvm-debuginfo-analyzer --attribute=level,format \
+; RUN:                         --output-sort=offset \
+; RUN:                         --print=elements \
+; RUN:                         %t.test-clang.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=ONE %s
+
+; ONE:      Logical View:
+; ONE-NEXT: [000]           {File} '{{.*}}test-clang.o' -> WASM
+; ONE-EMPTY:
+; ONE-NEXT: [001]             {CompileUnit} 'test.cpp'
+; ONE-NEXT: [002]     2         {Function} extern not_inlined 'foo' -> 'int'
+; ONE-NEXT: [003]     2           {Parameter} 'ParamPtr' -> 'INTPTR'
+; ONE-NEXT: [003]     2           {Parameter} 'ParamUnsigned' -> 'unsigned int'
+; ONE-NEXT: [003]     2           {Parameter} 'ParamBool' -> 'bool'
+; ONE-NEXT: [003]                 {Block}
+; ONE-NEXT: [004]     5             {Variable} 'CONSTANT' -> 'const INTEGER'
+; ONE-NEXT: [004]     5             {Line}
+; ONE-NEXT: [004]                   {Code} 'i32.const	7'
+; ONE-NEXT: [004]                   {Code} 'local.set	10'
+; ONE-NEXT: [004]                   {Code} 'local.get	5'
+; ONE-NEXT: [004]                   {Code} 'local.get	10'
+; ONE-NEXT: [004]                   {Code} 'i32.store	12'
+; ONE-NEXT: [004]     6             {Line}
+; ONE-NEXT: [004]                   {Code} 'i32.const	7'
+; ONE-NEXT: [004]                   {Code} 'local.set	11'
+; ONE-NEXT: [004]                   {Code} 'local.get	5'
+; ONE-NEXT: [004]                   {Code} 'local.get	11'
+; ONE-NEXT: [004]                   {Code} 'i32.store	28'
+; ONE-NEXT: [004]                   {Code} 'br      	1'
+; ONE-NEXT: [004]     -             {Line}
+; ONE-NEXT: [004]                   {Code} 'end'
+; ONE-NEXT: [003]     4           {TypeAlias} 'INTEGER' -> 'int'
+; ONE-NEXT: [003]     2           {Line}
+; ONE-NEXT: [003]                 {Code} 'nop'
+; ONE-NEXT: [003]                 {Code} 'end'
+; ONE-NEXT: [003]                 {Code} 'i64.div_s'
+; ONE-NEXT: [003]                 {Code} 'global.get	0'
+; ONE-NEXT: [003]                 {Code} 'local.set	3'
+; ONE-NEXT: [003]                 {Code} 'i32.const	32'
+; ONE-NEXT: [003]                 {Code} 'local.set	4'
+; ONE-NEXT: [003]                 {Code} 'local.get	3'
+; ONE-NEXT: [003]                 {Code} 'local.get	4'
+; ONE-NEXT: [003]                 {Code} 'i32.sub'
+; ONE-NEXT: [003]                 {Code} 'local.set	5'
+; ONE-NEXT: [003]                 {Code} 'local.get	5'
+; ONE-NEXT: [003]                 {Code} 'local.get	0'
+; ONE-NEXT: [003]                 {Code} 'i32.store	24'
+; ONE-NEXT: [003]                 {Code} 'local.get	5'
+; ONE-NEXT: [003]                 {Code} 'local.get	1'
+; ONE-NEXT: [003]                 {Code} 'i32.store	20'
+; ONE-NEXT: [003]                 {Code} 'local.get	2'
+; ONE-NEXT: [003]                 {Code} 'local.set	6'
+; ONE-NEXT: [003]                 {Code} 'local.get	5'
+; ONE-NEXT: [003]                 {Code} 'local.get	6'
+; ONE-NEXT: [003]                 {Code} 'i32.store8	19'
+; ONE-NEXT: [003]     3           {Line}
+; ONE-NEXT: [003]                 {Code} 'local.get	5'
+; ONE-NEXT: [003]                 {Code} 'i32.load8_u	19'
+; ONE-NEXT: [003]                 {Code} 'local.set	7'
+; ONE-NEXT: [003]     3           {Line}
+; ONE-NEXT: [003]                 {Code} 'i32.const	1'
+; ONE-NEXT: [003]                 {Code} 'local.set	8'
+; ONE-NEXT: [003]                 {Code} 'local.get	7'
+; ONE-NEXT: [003]                 {Code} 'local.get	8'
+; ONE-NEXT: [003]                 {Code} 'i32.and'
+; ONE-NEXT: [003]                 {Code} 'local.set	9'
+; ONE-NEXT: [003]                 {Code} 'block'
+; ONE-NEXT: [003]                 {Code} 'block'
+; ONE-NEXT: [003]                 {Code} 'local.get	9'
+; ONE-NEXT: [003]                 {Code} 'i32.eqz'
+; ONE-NEXT: [003]                 {Code} 'br_if   	0'
+; ONE-NEXT: [003]     8           {Line}
+; ONE-NEXT: [003]                 {Code} 'local.get	5'
+; ONE-NEXT: [003]                 {Code} 'i32.load	20'
+; ONE-NEXT: [003]                 {Code} 'local.set	12'
+; ONE-NEXT: [003]     8           {Line}
+; ONE-NEXT: [003]                 {Code} 'local.get	5'
+; ONE-NEXT: [003]                 {Code} 'local.get	12'
+; ONE-NEXT: [003]                 {Code} 'i32.store	28'
+; ONE-NEXT: [003]     -           {Line}
+; ONE-NEXT: [003]                 {Code} 'end'
+; ONE-NEXT: [003]     9           {Line}
+; ONE-NEXT: [003]                 {Code} 'local.get	5'
+; ONE-NEXT: [003]                 {Code} 'i32.load	28'
+; ONE-NEXT: [003]                 {Code} 'local.set	13'
+; ONE-NEXT: [003]                 {Code} 'local.get	13'
+; ONE-NEXT: [003]                 {Code} 'return'
+; ONE-NEXT: [003]                 {Code} 'end'
+; ONE-NEXT: [003]     9           {Line}
+; ONE-NEXT: [003]                 {Code} 'unreachable'
+; ONE-NEXT: [002]     1         {TypeAlias} 'INTPTR' -> '* const int'
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/01-wasm-select-logical-elements.test b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/01-wasm-select-logical-elements.test
new file mode 100644
index 0000000..f50cc2d
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/01-wasm-select-logical-elements.test
@@ -0,0 +1,76 @@
+; REQUIRES: webassembly-registered-target
+
+; Test case 1 - General options
+
+; test.cpp
+;  1  using INTPTR = const int *;
+;  2  int foo(INTPTR ParamPtr, unsigned ParamUnsigned, bool ParamBool) {
+;  3    if (ParamBool) {
+;  4      typedef int INTEGER;
+;  5      const INTEGER CONSTANT = 7;
+;  6      return CONSTANT;
+;  7    }
+;  8    return ParamUnsigned;
+;  9  }
+
+; Select logical elements.
+; The following prints all 'instructions', 'symbols' and 'types' that
+; contain 'BLOCK' or '.store' in their names or types, using a tab layout
+; and given the number of matches.
+
+; RUN: llvm-mc -arch=wasm32 -filetype=obj \
+; RUN:         %p/Inputs/test-clang.s -o %t.test-clang.o
+
+; RUN: llvm-debuginfo-analyzer --attribute=level \
+; RUN:                         --select-nocase --select-regex \
+; RUN:                         --select=BLOCK --select=.store \
+; RUN:                         --report=list \
+; RUN:                         --print=symbols,types,instructions,summary \
+; RUN:                         %t.test-clang.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=ONE %s
+
+; ONE:      Logical View:
+; ONE-NEXT: [000]           {File} '{{.*}}test-clang.o'
+; ONE-EMPTY:
+; ONE-NEXT: [001]           {CompileUnit} 'test.cpp'
+; ONE-NEXT: [003]           {Code} 'block'
+; ONE-NEXT: [003]           {Code} 'block'
+; ONE-NEXT: [004]           {Code} 'i32.store	12'
+; ONE-NEXT: [003]           {Code} 'i32.store	20'
+; ONE-NEXT: [003]           {Code} 'i32.store	24'
+; ONE-NEXT: [004]           {Code} 'i32.store	28'
+; ONE-NEXT: [003]           {Code} 'i32.store	28'
+; ONE-NEXT: [003]           {Code} 'i32.store8	19'
+; ONE-EMPTY:
+; ONE-NEXT: -----------------------------
+; ONE-NEXT: Element      Total    Printed
+; ONE-NEXT: -----------------------------
+; ONE-NEXT: Scopes           3          0
+; ONE-NEXT: Symbols          4          0
+; ONE-NEXT: Types            2          0
+; ONE-NEXT: Lines           62          8
+; ONE-NEXT: -----------------------------
+; ONE-NEXT: Total           71          8
+
+; RUN: llvm-debuginfo-analyzer --attribute=level \
+; RUN:                         --select-regex --select-nocase \
+; RUN:                         --select=INTe \
+; RUN:                         --report=list \
+; RUN:                         --print=symbols,types \
+; RUN:                         %t.test-clang.o \
+; RUN:                         %p/../DWARF/Inputs/test-dwarf-gcc.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=TWO %s
+
+; TWO:      Logical View:
+; TWO-NEXT: [000]           {File} '{{.*}}test-clang.o'
+; TWO-EMPTY:
+; TWO-NEXT: [001]           {CompileUnit} 'test.cpp'
+; TWO-NEXT: [003]     4     {TypeAlias} 'INTEGER' -> 'int'
+; TWO-NEXT: [004]     5     {Variable} 'CONSTANT' -> 'const INTEGER'
+; TWO-EMPTY:
+; TWO-NEXT: Logical View:
+; TWO-NEXT: [000]           {File} 'test-dwarf-gcc.o'
+; TWO-EMPTY:
+; TWO-NEXT: [001]           {CompileUnit} 'test.cpp'
+; TWO-NEXT: [004]     4     {TypeAlias} 'INTEGER' -> 'int'
+; TWO-NEXT: [004]     5     {Variable} 'CONSTANT' -> 'const INTEGER'
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/02-wasm-logical-lines.test b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/02-wasm-logical-lines.test
new file mode 100644
index 0000000..101f6ab
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/02-wasm-logical-lines.test
@@ -0,0 +1,74 @@
+; REQUIRES: webassembly-registered-target
+
+; Test case 2 - Assembler instructions.
+
+; hello-world.cpp
+;  1  extern int printf(const char * format, ... );
+;  2
+;  3  int main()
+;  4  {
+;  5    printf("Hello, World\n");
+;  6    return 0;
+;  7  }
+
+; Logical lines.
+; The logical views shows the intermixed lines and assembler instructions,
+; allowing to compare the code generated by the different toolchains.
+
+; RUN: llvm-mc -arch=wasm32 -filetype=obj \
+; RUN:         %p/Inputs/hello-world-clang.s -o %t.hello-world-clang.o
+
+; RUN: llvm-debuginfo-analyzer --attribute=level,format,producer \
+; RUN:                         --print=lines,instructions \
+; RUN:                         %t.hello-world-clang.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=ONE %s
+
+; ONE:      Logical View:
+; ONE-NEXT: [000]           {File} '{{.*}}hello-world-clang.o' -> WASM
+; ONE-EMPTY:
+; ONE-NEXT: [001]             {CompileUnit} 'hello-world.cpp'
+; ONE-NEXT: [002]               {Producer} 'clang version 19{{.*}}'
+; ONE-NEXT: [002]     3         {Function} extern not_inlined 'main' -> 'int'
+; ONE-NEXT: [003]     4           {Line}
+; ONE-NEXT: [003]                 {Code} 'nop'
+; ONE-NEXT: [003]                 {Code} 'rethrow 	127'
+; ONE-NEXT: [003]                 {Code} 'global.get	0'
+; ONE-NEXT: [003]                 {Code} 'local.set	0'
+; ONE-NEXT: [003]                 {Code} 'i32.const	16'
+; ONE-NEXT: [003]                 {Code} 'local.set	1'
+; ONE-NEXT: [003]                 {Code} 'local.get	0'
+; ONE-NEXT: [003]                 {Code} 'local.get	1'
+; ONE-NEXT: [003]                 {Code} 'i32.sub'
+; ONE-NEXT: [003]                 {Code} 'local.set	2'
+; ONE-NEXT: [003]                 {Code} 'local.get	2'
+; ONE-NEXT: [003]                 {Code} 'global.set	0'
+; ONE-NEXT: [003]                 {Code} 'i32.const	0'
+; ONE-NEXT: [003]                 {Code} 'local.set	3'
+; ONE-NEXT: [003]                 {Code} 'local.get	2'
+; ONE-NEXT: [003]                 {Code} 'local.get	3'
+; ONE-NEXT: [003]                 {Code} 'i32.store	12'
+; ONE-NEXT: [003]     5           {Line}
+; ONE-NEXT: [003]                 {Code} 'i32.const	0'
+; ONE-NEXT: [003]                 {Code} 'local.set	4'
+; ONE-NEXT: [003]                 {Code} 'i32.const	0'
+; ONE-NEXT: [003]                 {Code} 'local.set	5'
+; ONE-NEXT: [003]                 {Code} 'local.get	4'
+; ONE-NEXT: [003]                 {Code} 'local.get	5'
+; ONE-NEXT: [003]                 {Code} 'call	0'
+; ONE-NEXT: [003]                 {Code} 'drop'
+; ONE-NEXT: [003]     6           {Line}
+; ONE-NEXT: [003]                 {Code} 'i32.const	0'
+; ONE-NEXT: [003]                 {Code} 'local.set	6'
+; ONE-NEXT: [003]                 {Code} 'i32.const	16'
+; ONE-NEXT: [003]                 {Code} 'local.set	7'
+; ONE-NEXT: [003]                 {Code} 'local.get	2'
+; ONE-NEXT: [003]                 {Code} 'local.get	7'
+; ONE-NEXT: [003]                 {Code} 'i32.add'
+; ONE-NEXT: [003]                 {Code} 'local.set	8'
+; ONE-NEXT: [003]                 {Code} 'local.get	8'
+; ONE-NEXT: [003]                 {Code} 'global.set	0'
+; ONE-NEXT: [003]                 {Code} 'local.get	6'
+; ONE-NEXT: [003]                 {Code} 'return'
+; ONE-NEXT: [003]                 {Code} 'end'
+; ONE-NEXT: [003]     6           {Line}
+; ONE-NEXT: [003]                 {Code} 'return'
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/03-wasm-incorrect-lexical-scope-typedef.test b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/03-wasm-incorrect-lexical-scope-typedef.test
new file mode 100644
index 0000000..eb05eca
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/03-wasm-incorrect-lexical-scope-typedef.test
@@ -0,0 +1,135 @@
+; REQUIRES: webassembly-registered-target
+
+; Test case 3 - Incorrect lexical scope for typedef.
+
+; pr-44884.cpp
+;  1  int bar(float Input) { return (int)Input; }
+;  2
+;  3  unsigned foo(char Param) {
+;  4    typedef int INT;                // ** Definition for INT **
+;  5    INT Value = Param;
+;  6    {
+;  7      typedef float FLOAT;          // ** Definition for FLOAT **
+;  8      {
+;  9        FLOAT Added = Value + Param;
+; 10        Value = bar(Added);
+; 11      }
+; 12    }
+; 13    return Value + Param;
+; 14  }
+
+; The lines 4 and 7 contains 2 typedefs, defined at different lexical
+; scopes.
+
+; The above test is used to illustrates a scope issue found in the
+; Clang compiler.
+; PR44884: https://bugs.llvm.org/show_bug.cgi?id=44884
+; PR44229: https://github.com/llvm/llvm-project/issues/44229
+
+; In the following logical views, we can see that the Clang compiler
+; emits both typedefs at the same lexical scope (3), which is wrong.
+; GCC emit correct lexical scope for both typedefs.
+
+; RUN: llvm-mc -arch=wasm32 -filetype=obj \
+; RUN:         %p/Inputs/pr-44884-clang.s -o %t.pr-44884-clang.o
+
+; RUN: llvm-debuginfo-analyzer --attribute=level,format,producer \
+; RUN:                         --output-sort=kind \
+; RUN:                         --print=symbols,types,lines \
+; RUN:                         %t.pr-44884-clang.o \
+; RUN:                         %p/../DWARF/Inputs/pr-44884-dwarf-gcc.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=ONE %s
+
+; ONE:      Logical View:
+; ONE-NEXT: [000]           {File} '{{.*}}pr-44884-clang.o' -> WASM
+; ONE-EMPTY:
+; ONE-NEXT: [001]             {CompileUnit} 'pr-44884.cpp'
+; ONE-NEXT: [002]               {Producer} 'clang version 19{{.*}}'
+; ONE-NEXT: [002]     1         {Function} extern not_inlined 'bar' -> 'int'
+; ONE-NEXT: [003]     1           {Parameter} 'Input' -> 'float'
+; ONE-NEXT: [003]     1           {Line}
+; ONE-NEXT: [003]     1           {Line}
+; ONE-NEXT: [003]     -           {Line}
+; ONE-NEXT: [003]     1           {Line}
+; ONE-NEXT: [003]     -           {Line}
+; ONE-NEXT: [003]     1           {Line}
+; ONE-NEXT: [003]     1           {Line}
+; ONE-NEXT: [003]     1           {Line}
+; ONE-NEXT: [002]     3         {Function} extern not_inlined 'foo' -> 'unsigned int'
+; ONE-NEXT: [003]                 {Block}
+; ONE-NEXT: [004]     9             {Variable} 'Added' -> 'FLOAT'
+; ONE-NEXT: [004]     9             {Line}
+; ONE-NEXT: [004]     9             {Line}
+; ONE-NEXT: [004]     9             {Line}
+; ONE-NEXT: [004]     9             {Line}
+; ONE-NEXT: [004]     9             {Line}
+; ONE-NEXT: [004]    10             {Line}
+; ONE-NEXT: [004]    10             {Line}
+; ONE-NEXT: [004]    10             {Line}
+; ONE-NEXT: [004]    13             {Line}
+; ONE-NEXT: [003]     3           {Parameter} 'Param' -> 'char'
+; ONE-NEXT: [003]     7           {TypeAlias} 'FLOAT' -> 'float'
+; ONE-NEXT: [003]     4           {TypeAlias} 'INT' -> 'int'
+; ONE-NEXT: [003]     5           {Variable} 'Value' -> 'INT'
+; ONE-NEXT: [003]     3           {Line}
+; ONE-NEXT: [003]     5           {Line}
+; ONE-NEXT: [003]     5           {Line}
+; ONE-NEXT: [003]    13           {Line}
+; ONE-NEXT: [003]    13           {Line}
+; ONE-NEXT: [003]    13           {Line}
+; ONE-NEXT: [003]    13           {Line}
+; ONE-EMPTY:
+; ONE-NEXT: Logical View:
+; ONE-NEXT: [000]           {File} 'pr-44884-dwarf-gcc.o' -> elf64-x86-64
+; ONE-EMPTY:
+; ONE-NEXT: [001]             {CompileUnit} 'pr-44884.cpp'
+; ONE-NEXT: [002]               {Producer} 'GNU C++14 10.3.0 {{.*}}'
+; ONE-NEXT: [002]     1         {Function} extern not_inlined 'bar' -> 'int'
+; ONE-NEXT: [003]     1           {Parameter} 'Input' -> 'float'
+; ONE-NEXT: [003]     1           {Line}
+; ONE-NEXT: [003]     1           {Line}
+; ONE-NEXT: [003]     1           {Line}
+; ONE-NEXT: [002]     3         {Function} extern not_inlined 'foo' -> 'unsigned int'
+; ONE-NEXT: [003]                 {Block}
+; ONE-NEXT: [004]                   {Block}
+; ONE-NEXT: [005]     9               {Variable} 'Added' -> 'FLOAT'
+; ONE-NEXT: [005]     9               {Line}
+; ONE-NEXT: [005]     9               {Line}
+; ONE-NEXT: [005]     9               {Line}
+; ONE-NEXT: [005]    10               {Line}
+; ONE-NEXT: [005]    13               {Line}
+; ONE-NEXT: [004]     7             {TypeAlias} 'FLOAT' -> 'float'
+; ONE-NEXT: [003]     3           {Parameter} 'Param' -> 'char'
+; ONE-NEXT: [003]     4           {TypeAlias} 'INT' -> 'int'
+; ONE-NEXT: [003]     5           {Variable} 'Value' -> 'INT'
+; ONE-NEXT: [003]     3           {Line}
+; ONE-NEXT: [003]     5           {Line}
+; ONE-NEXT: [003]    13           {Line}
+; ONE-NEXT: [003]    14           {Line}
+; ONE-NEXT: [003]    14           {Line}
+
+; Using the selection facilities, we can produce a simple tabular
+; output showing just the logical types that are 'Typedef'.
+
+; RUN: llvm-debuginfo-analyzer --attribute=level,format \
+; RUN:                         --output-sort=name \
+; RUN:                         --select-types=Typedef \
+; RUN:                         --report=list \
+; RUN:                         --print=types \
+; RUN:                         %t.pr-44884-clang.o \
+; RUN:                         %p/../DWARF/Inputs/pr-44884-dwarf-gcc.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=TWO %s
+
+; TWO:      Logical View:
+; TWO-NEXT: [000]           {File} '{{.*}}pr-44884-clang.o' -> WASM
+; TWO-EMPTY:
+; TWO-NEXT: [001]           {CompileUnit} 'pr-44884.cpp'
+; TWO-NEXT: [003]     7     {TypeAlias} 'FLOAT' -> 'float'
+; TWO-NEXT: [003]     4     {TypeAlias} 'INT' -> 'int'
+; TWO-EMPTY:
+; TWO-NEXT: Logical View:
+; TWO-NEXT: [000]           {File} 'pr-44884-dwarf-gcc.o' -> elf64-x86-64
+; TWO-EMPTY:
+; TWO-NEXT: [001]           {CompileUnit} 'pr-44884.cpp'
+; TWO-NEXT: [004]     7     {TypeAlias} 'FLOAT' -> 'float'
+; TWO-NEXT: [003]     4     {TypeAlias} 'INT' -> 'int'
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/04-wasm-missing-nested-enumerators.test b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/04-wasm-missing-nested-enumerators.test
new file mode 100644
index 0000000..cafa51c
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/04-wasm-missing-nested-enumerators.test
@@ -0,0 +1,130 @@
+; REQUIRES: webassembly-registered-target
+
+; Test case 4 - Missing nested enumerations.
+
+; pr-46466.cpp
+;   1  struct Struct {
+;   2    union Union {
+;   3      enum NestedEnum { RED, BLUE };
+;   4    };
+;   5    Union U;
+;   6  };
+;   7
+;   8  Struct S;
+;   9  int test() {
+;  10    return S.U.BLUE;
+;  11  }
+
+; The above test is used to illustrate a scope issue found in the Clang
+; compiler.
+; PR46466: https://bugs.llvm.org/show_bug.cgi?id=46466
+; PR45811: https://github.com/llvm/llvm-project/issues/45811
+
+; In the following logical views, we can see that the DWARF debug
+; information generated by the Clang compiler does not include any
+; references to the enumerators 'RED' and 'BLUE'. The DWARF generated
+; by GCC, does include such references.
+
+; RUN: llvm-mc -arch=wasm32 -filetype=obj \
+; RUN:         %p/Inputs/pr-46466-clang.s -o %t.pr-46466-clang.o
+
+; RUN: llvm-debuginfo-analyzer --attribute=level,format,producer \
+; RUN:                         --output-sort=name \
+; RUN:                         --print=symbols,types \
+; RUN:                         %t.pr-46466-clang.o \
+; RUN:                         %p/../DWARF/Inputs/pr-46466-dwarf-gcc.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=ONE %s
+
+; ONE:      Logical View:
+; ONE-NEXT: [000]           {File} '{{.*}}pr-46466-clang.o' -> WASM
+; ONE-EMPTY:
+; ONE-NEXT: [001]             {CompileUnit} 'pr-46466.cpp'
+; ONE-NEXT: [002]               {Producer} 'clang version 19{{.*}}'
+; ONE-NEXT: [002]     8         {Variable} extern 'S' -> 'Struct'
+; ONE-NEXT: [002]     1         {Struct} 'Struct'
+; ONE-NEXT: [003]     5           {Member} public 'U' -> 'Union'
+; ONE-EMPTY:
+; ONE-NEXT: Logical View:
+; ONE-NEXT: [000]           {File} 'pr-46466-dwarf-gcc.o' -> elf64-x86-64
+; ONE-EMPTY:
+; ONE-NEXT: [001]             {CompileUnit} 'pr-46466.cpp'
+; ONE-NEXT: [002]               {Producer} 'GNU C++14 10.3.0 {{.*}}'
+; ONE-NEXT: [002]     8         {Variable} extern 'S' -> 'Struct'
+; ONE-NEXT: [002]     1         {Struct} 'Struct'
+; ONE-NEXT: [003]     5           {Member} public 'U' -> 'Union'
+; ONE-NEXT: [003]     2           {Union} 'Union'
+; ONE-NEXT: [004]     3             {Enumeration} 'NestedEnum' -> 'unsigned int'
+; ONE-NEXT: [005]                     {Enumerator} 'BLUE' = '0x1'
+; ONE-NEXT: [005]                     {Enumerator} 'RED' = '0x0'
+
+; Using the selection facilities, we can produce a logical view
+; showing just the logical types that are 'Enumerator' and its
+; parents. The logical view is sorted by the types name.
+
+; RUN: llvm-debuginfo-analyzer --attribute=level,format \
+; RUN:                         --output-sort=name \
+; RUN:                         --select-types=Enumerator \
+; RUN:                         --report=parents \
+; RUN:                         --print=types \
+; RUN:                         %t.pr-46466-clang.o \
+; RUN:                         %p/../DWARF/Inputs/pr-46466-dwarf-gcc.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=TWO %s
+
+; TWO:      Logical View:
+; TWO-NEXT: [000]           {File} '{{.*}}pr-46466-clang.o' -> WASM
+; TWO-EMPTY:
+; TWO-NEXT: [001]             {CompileUnit} 'pr-46466.cpp'
+; TWO-EMPTY:
+; TWO-NEXT: Logical View:
+; TWO-NEXT: [000]           {File} 'pr-46466-dwarf-gcc.o' -> elf64-x86-64
+; TWO-EMPTY:
+; TWO-NEXT: [001]             {CompileUnit} 'pr-46466.cpp'
+; TWO-NEXT: [002]     1         {Struct} 'Struct'
+; TWO-NEXT: [003]     2           {Union} 'Union'
+; TWO-NEXT: [004]     3             {Enumeration} 'NestedEnum' -> 'unsigned int'
+; TWO-NEXT: [005]                     {Enumerator} 'BLUE' = '0x1'
+; TWO-NEXT: [005]                     {Enumerator} 'RED' = '0x0'
+
+; Using the selection facilities, we can produce a simple tabular output
+; including a summary for the logical types that are 'Enumerator'. The
+; logical view is sorted by the types name.
+
+; RUN: llvm-debuginfo-analyzer --attribute=level,format \
+; RUN:                         --output-sort=name \
+; RUN:                         --select-types=Enumerator \
+; RUN:                         --print=types,summary \
+; RUN:                         %t.pr-46466-clang.o \
+; RUN:                         %p/../DWARF/Inputs/pr-46466-dwarf-gcc.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=THR %s
+
+; THR:      Logical View:
+; THR-NEXT: [000]           {File} '{{.*}}pr-46466-clang.o' -> WASM
+; THR-EMPTY:
+; THR-NEXT: [001]           {CompileUnit} 'pr-46466.cpp'
+; THR-EMPTY:
+; THR-NEXT: -----------------------------
+; THR-NEXT: Element      Total    Printed
+; THR-NEXT: -----------------------------
+; THR-NEXT: Scopes           4          0
+; THR-NEXT: Symbols          0          0
+; THR-NEXT: Types            0          0
+; THR-NEXT: Lines            0          0
+; THR-NEXT: -----------------------------
+; THR-NEXT: Total            4          0
+; THR-EMPTY:
+; THR-NEXT: Logical View:
+; THR-NEXT: [000]           {File} 'pr-46466-dwarf-gcc.o' -> elf64-x86-64
+; THR-EMPTY:
+; THR-NEXT: [001]           {CompileUnit} 'pr-46466.cpp'
+; THR-NEXT: [005]           {Enumerator} 'BLUE' = '0x1'
+; THR-NEXT: [005]           {Enumerator} 'RED' = '0x0'
+; THR-EMPTY:
+; THR-NEXT: -----------------------------
+; THR-NEXT: Element      Total    Printed
+; THR-NEXT: -----------------------------
+; THR-NEXT: Scopes           5          0
+; THR-NEXT: Symbols          0          0
+; THR-NEXT: Types            2          2
+; THR-NEXT: Lines            0          0
+; THR-NEXT: -----------------------------
+; THR-NEXT: Total            7          2
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/05-wasm-incorrect-lexical-scope-variable.test b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/05-wasm-incorrect-lexical-scope-variable.test
new file mode 100644
index 0000000..4348161
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/05-wasm-incorrect-lexical-scope-variable.test
@@ -0,0 +1,114 @@
+; REQUIRES: webassembly-registered-target
+
+; Test case 5 - Incorrect lexical scope variable.
+
+; pr-43860.cpp
+;  1  #include "definitions.h"
+;  2  forceinline int InlineFunction(int Param) {
+;  3    int Var_1 = Param;
+;  4    {
+;  5      int Var_2 = Param + Var_1;
+;  6      Var_1 = Var_2;
+;  7    }
+;  8    return Var_1;
+;  9  }
+; 10
+; 11  int test(int Param_1, int Param_2) {
+; 12    int A = Param_1;
+; 13    A += InlineFunction(Param_2);
+; 14    return A;
+; 15  }
+
+; The above test is used to illustrate a variable issue found in the
+; Clang compiler.
+; PR43860: https://bugs.llvm.org/show_bug.cgi?id=43860
+; PR43205: https://github.com/llvm/llvm-project/issues/43205
+
+; In the following logical views, we can see that the DWARF debug
+; information generated by the Clang compiler shows the variables
+; 'Var_1' and 'Var_2' are at the same lexical scope (4) in the function
+; 'InlineFuction'.
+; The DWARF generated by GCC/Clang show those variables at the correct
+; lexical scope: '3' and '4' respectively.
+
+; RUN: llvm-mc -arch=wasm32 -filetype=obj \
+; RUN:         %p/Inputs/pr-43860-clang.s -o %t.pr-43860-clang.o
+
+; RUN: llvm-debuginfo-analyzer --attribute=level,format,producer \
+; RUN:                         --output-sort=name \
+; RUN:                         --print=symbols \
+; RUN:                         %t.pr-43860-clang.o \
+; RUN:                         %p/../DWARF/Inputs/pr-43860-dwarf-gcc.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=ONE %s
+
+; ONE:      Logical View:
+; ONE-NEXT: [000]           {File} '{{.*}}pr-43860-clang.o' -> WASM
+; ONE-EMPTY:
+; ONE-NEXT: [001]             {CompileUnit} 'pr-43860.cpp'
+; ONE-NEXT: [002]               {Producer} 'clang version 19{{.*}}'
+; ONE-NEXT: [002]     2         {Function} extern inlined 'InlineFunction' -> 'int'
+; ONE-NEXT: [003]                 {Block}
+; ONE-NEXT: [004]     5             {Variable} 'Var_2' -> 'int'
+; ONE-NEXT: [003]     2           {Parameter} 'Param' -> 'int'
+; ONE-NEXT: [003]     3           {Variable} 'Var_1' -> 'int'
+; ONE-NEXT: [002]    11         {Function} extern not_inlined 'test' -> 'int'
+; ONE-NEXT: [003]    12           {Variable} 'A' -> 'int'
+; ONE-NEXT: [003]    13           {InlinedFunction} inlined 'InlineFunction' -> 'int'
+; ONE-NEXT: [004]                   {Block}
+; ONE-NEXT: [005]                     {Variable} 'Var_2' -> 'int'
+; ONE-NEXT: [004]                   {Parameter} 'Param' -> 'int'
+; ONE-NEXT: [004]                   {Variable} 'Var_1' -> 'int'
+; ONE-NEXT: [003]    11           {Parameter} 'Param_1' -> 'int'
+; ONE-NEXT: [003]    11           {Parameter} 'Param_2' -> 'int'
+; ONE-EMPTY:
+; ONE-NEXT: Logical View:
+; ONE-NEXT: [000]           {File} 'pr-43860-dwarf-gcc.o' -> elf64-x86-64
+; ONE-EMPTY:
+; ONE-NEXT: [001]             {CompileUnit} 'pr-43860.cpp'
+; ONE-NEXT: [002]               {Producer} 'GNU C++14 10.3.0 {{.*}}'
+; ONE-NEXT: [002]     2         {Function} extern declared_inlined 'InlineFunction' -> 'int'
+; ONE-NEXT: [003]                 {Block}
+; ONE-NEXT: [004]     5             {Variable} 'Var_2' -> 'int'
+; ONE-NEXT: [003]     2           {Parameter} 'Param' -> 'int'
+; ONE-NEXT: [003]     3           {Variable} 'Var_1' -> 'int'
+; ONE-NEXT: [002]    11         {Function} extern not_inlined 'test' -> 'int'
+; ONE-NEXT: [003]    12           {Variable} 'A' -> 'int'
+; ONE-NEXT: [003]    13           {InlinedFunction} declared_inlined 'InlineFunction' -> 'int'
+; ONE-NEXT: [004]                   {Block}
+; ONE-NEXT: [005]                     {Variable} 'Var_2' -> 'int'
+; ONE-NEXT: [004]                   {Parameter} 'Param' -> 'int'
+; ONE-NEXT: [004]                   {Variable} 'Var_1' -> 'int'
+; ONE-NEXT: [003]    11           {Parameter} 'Param_1' -> 'int'
+; ONE-NEXT: [003]    11           {Parameter} 'Param_2' -> 'int'
+
+; Using the selection facilities, we can produce a simple tabular output
+; showing just the logical elements that have in their name the 'var'
+; pattern. The logical view is sorted by the variables name.
+
+; RUN: llvm-debuginfo-analyzer --attribute=level,format \
+; RUN:                         --output-sort=name \
+; RUN:                         --select-regex --select-nocase \
+; RUN:                         --select=Var \
+; RUN:                         --report=list \
+; RUN:                         --print=symbols \
+; RUN:                         %t.pr-43860-clang.o \
+; RUN:                         %p/../DWARF/Inputs/pr-43860-dwarf-gcc.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=TWO %s
+
+; TWO:      Logical View:
+; TWO-NEXT: [000]           {File} '{{.*}}pr-43860-clang.o' -> WASM
+; TWO-EMPTY:
+; TWO-NEXT: [001]           {CompileUnit} 'pr-43860.cpp'
+; TWO-NEXT: [004]           {Variable} 'Var_1' -> 'int'
+; TWO-NEXT: [003]     3     {Variable} 'Var_1' -> 'int'
+; TWO-NEXT: [005]           {Variable} 'Var_2' -> 'int'
+; TWO-NEXT: [004]     5     {Variable} 'Var_2' -> 'int'
+; TWO-EMPTY:
+; TWO-NEXT: Logical View:
+; TWO-NEXT: [000]           {File} 'pr-43860-dwarf-gcc.o' -> elf64-x86-64
+; TWO-EMPTY:
+; TWO-NEXT: [001]           {CompileUnit} 'pr-43860.cpp'
+; TWO-NEXT: [004]           {Variable} 'Var_1' -> 'int'
+; TWO-NEXT: [003]     3     {Variable} 'Var_1' -> 'int'
+; TWO-NEXT: [005]           {Variable} 'Var_2' -> 'int'
+; TWO-NEXT: [004]     5     {Variable} 'Var_2' -> 'int'
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/06-wasm-full-logical-view.test b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/06-wasm-full-logical-view.test
new file mode 100644
index 0000000..81b78ba
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/06-wasm-full-logical-view.test
@@ -0,0 +1,158 @@
+; REQUIRES: webassembly-registered-target
+
+; Test case 6 - Full logical view
+
+; test.cpp
+;  1  using INTPTR = const int *;
+;  2  int foo(INTPTR ParamPtr, unsigned ParamUnsigned, bool ParamBool) {
+;  3    if (ParamBool) {
+;  4      typedef int INTEGER;
+;  5      const INTEGER CONSTANT = 7;
+;  6      return CONSTANT;
+;  7    }
+;  8    return ParamUnsigned;
+;  9  }
+
+; Print low level details.
+; The following command prints low level information that includes
+; offsets within the debug information section, debug location
+; operands, linkage names, etc.
+
+; RUN: llvm-mc -arch=wasm32 -filetype=obj \
+; RUN:         %p/Inputs/test-clang.s -o %t.test-clang.o
+
+; RUN: llvm-debuginfo-analyzer --attribute=all \
+; RUN:                         --print=all \
+; RUN:                         %t.test-clang.o 2>&1 | \
+; RUN: FileCheck --strict-whitespace -check-prefix=ONE %s
+
+; ONE:      Logical View:
+; ONE-NEXT: [0x0000000000][000]            {File} '{{.*}}test-clang.o' -> WASM
+; ONE-EMPTY:
+; ONE-NEXT: [0x000000000b][001]              {CompileUnit} 'test.cpp'
+; ONE-NEXT: [0x000000000b][002]                {Producer} 'clang version 19{{.*}}'
+; ONE-NEXT:                                    {Directory} '{{.*}}/general'
+; ONE-NEXT:                                    {File} 'test.cpp'
+; ONE-NEXT:                                    {Public} 'foo' [0x0000000002:0x000000007f]
+; ONE-NEXT: [0x000000000b][002]                {Range} Lines 2:9 [0x0000000002:0x000000007f]
+; ONE-NEXT: [0x00000000b3][002]                {BaseType} 'bool'
+; ONE-NEXT: [0x0000000090][002]                {BaseType} 'int'
+; ONE-NEXT: [0x00000000ac][002]                {BaseType} 'unsigned int'
+; ONE-EMPTY:
+; ONE-NEXT: [0x0000000097][002]   {Source} '{{.*}}general/test.cpp'
+; ONE-NEXT: [0x0000000097][002]      1         {TypeAlias} 'INTPTR' -> [0x00000000a2]'* const int'
+; ONE-NEXT: [0x0000000026][002]      2         {Function} extern not_inlined 'foo' -> [0x0000000090]'int'
+; ONE-NEXT: [0x0000000026][003]                  {Range} Lines 2:9 [0x0000000002:0x000000007f]
+; ONE-NEXT: [0x0000000026][003]                  {Linkage}  0x3 '_Z3fooPKijb'
+; ONE-NEXT: [0x000000006c][003]                  {Block}
+; ONE-NEXT: [0x000000006c][004]                    {Range} Lines 5:0 [0x000000004c:0x0000000064]
+; ONE-NEXT: [0x0000000075][004]      5             {Variable} 'CONSTANT' -> [0x00000000ba]'const INTEGER'
+; ONE-NEXT: [0x0000000075][005]                      {Coverage} 100.00%
+; ONE-NEXT: [0x0000000076][005]                      {Location}
+; ONE-NEXT: [0x0000000076][006]                        {Entry} fbreg 12
+; ONE-NEXT: [0x000000004c][004]      5             {Line} {NewStatement} '{{.*}}/general/test.cpp'
+; ONE-NEXT: [0x000000004c][004]                    {Code} 'i32.const	7'
+; ONE-NEXT: [0x000000004e][004]                    {Code} 'local.set	10'
+; ONE-NEXT: [0x0000000050][004]                    {Code} 'local.get	5'
+; ONE-NEXT: [0x0000000052][004]                    {Code} 'local.get	10'
+; ONE-NEXT: [0x0000000054][004]                    {Code} 'i32.store	12'
+; ONE-NEXT: [0x0000000057][004]      6             {Line} {NewStatement} '{{.*}}/general/test.cpp'
+; ONE-NEXT: [0x0000000057][004]                    {Code} 'i32.const	7'
+; ONE-NEXT: [0x0000000059][004]                    {Code} 'local.set	11'
+; ONE-NEXT: [0x000000005b][004]                    {Code} 'local.get	5'
+; ONE-NEXT: [0x000000005d][004]                    {Code} 'local.get	11'
+; ONE-NEXT: [0x000000005f][004]                    {Code} 'i32.store	28'
+; ONE-NEXT: [0x0000000062][004]                    {Code} 'br      	1'
+; ONE-NEXT: [0x0000000064][004]      0             {Line} '{{.*}}/general/test.cpp'
+; ONE-NEXT: [0x0000000064][004]                    {Code} 'end'
+; ONE-NEXT: [0x000000005e][003]      2           {Parameter} 'ParamBool' -> [0x00000000b3]'bool'
+; ONE-NEXT: [0x000000005e][004]                    {Coverage} 100.00%
+; ONE-NEXT: [0x000000005f][004]                    {Location}
+; ONE-NEXT: [0x000000005f][005]                      {Entry} fbreg 19
+; ONE-NEXT: [0x0000000042][003]      2           {Parameter} 'ParamPtr' -> [0x0000000097]'INTPTR'
+; ONE-NEXT: [0x0000000042][004]                    {Coverage} 100.00%
+; ONE-NEXT: [0x0000000043][004]                    {Location}
+; ONE-NEXT: [0x0000000043][005]                      {Entry} fbreg 24
+; ONE-NEXT: [0x0000000050][003]      2           {Parameter} 'ParamUnsigned' -> [0x00000000ac]'unsigned int'
+; ONE-NEXT: [0x0000000050][004]                    {Coverage} 100.00%
+; ONE-NEXT: [0x0000000051][004]                    {Location}
+; ONE-NEXT: [0x0000000051][005]                      {Entry} fbreg 20
+; ONE-NEXT: [0x0000000084][003]      4           {TypeAlias} 'INTEGER' -> [0x0000000090]'int'
+; ONE-NEXT: [0x0000000002][003]      2           {Line} {NewStatement} '{{.*}}/general/test.cpp'
+; ONE-NEXT: [0x0000000002][003]                  {Code} 'nop'
+; ONE-NEXT: [0x0000000003][003]                  {Code} 'end'
+; ONE-NEXT: [0x0000000004][003]                  {Code} 'i64.div_s'
+; ONE-NEXT: [0x0000000005][003]                  {Code} 'global.get	0'
+; ONE-NEXT: [0x000000000b][003]                  {Code} 'local.set	3'
+; ONE-NEXT: [0x000000000d][003]                  {Code} 'i32.const	32'
+; ONE-NEXT: [0x000000000f][003]                  {Code} 'local.set	4'
+; ONE-NEXT: [0x0000000011][003]                  {Code} 'local.get	3'
+; ONE-NEXT: [0x0000000013][003]                  {Code} 'local.get	4'
+; ONE-NEXT: [0x0000000015][003]                  {Code} 'i32.sub'
+; ONE-NEXT: [0x0000000016][003]                  {Code} 'local.set	5'
+; ONE-NEXT: [0x0000000018][003]                  {Code} 'local.get	5'
+; ONE-NEXT: [0x000000001a][003]                  {Code} 'local.get	0'
+; ONE-NEXT: [0x000000001c][003]                  {Code} 'i32.store	24'
+; ONE-NEXT: [0x000000001f][003]                  {Code} 'local.get	5'
+; ONE-NEXT: [0x0000000021][003]                  {Code} 'local.get	1'
+; ONE-NEXT: [0x0000000023][003]                  {Code} 'i32.store	20'
+; ONE-NEXT: [0x0000000026][003]                  {Code} 'local.get	2'
+; ONE-NEXT: [0x0000000028][003]                  {Code} 'local.set	6'
+; ONE-NEXT: [0x000000002a][003]                  {Code} 'local.get	5'
+; ONE-NEXT: [0x000000002c][003]                  {Code} 'local.get	6'
+; ONE-NEXT: [0x000000002e][003]                  {Code} 'i32.store8	19'
+; ONE-NEXT: [0x0000000031][003]      3           {Line} {NewStatement} {PrologueEnd} '{{.*}}/general/test.cpp'
+; ONE-NEXT: [0x0000000031][003]                  {Code} 'local.get	5'
+; ONE-NEXT: [0x0000000033][003]                  {Code} 'i32.load8_u	19'
+; ONE-NEXT: [0x0000000036][003]                  {Code} 'local.set	7'
+; ONE-NEXT: [0x0000000038][003]      3           {Line} '{{.*}}/general/test.cpp'
+; ONE-NEXT: [0x0000000038][003]                  {Code} 'i32.const	1'
+; ONE-NEXT: [0x000000003a][003]                  {Code} 'local.set	8'
+; ONE-NEXT: [0x000000003c][003]                  {Code} 'local.get	7'
+; ONE-NEXT: [0x000000003e][003]                  {Code} 'local.get	8'
+; ONE-NEXT: [0x0000000040][003]                  {Code} 'i32.and'
+; ONE-NEXT: [0x0000000041][003]                  {Code} 'local.set	9'
+; ONE-NEXT: [0x0000000043][003]                  {Code} 'block'
+; ONE-NEXT: [0x0000000045][003]                  {Code} 'block'
+; ONE-NEXT: [0x0000000047][003]                  {Code} 'local.get	9'
+; ONE-NEXT: [0x0000000049][003]                  {Code} 'i32.eqz'
+; ONE-NEXT: [0x000000004a][003]                  {Code} 'br_if   	0'
+; ONE-NEXT: [0x0000000065][003]      8           {Line} {NewStatement} '{{.*}}/general/test.cpp'
+; ONE-NEXT: [0x0000000065][003]                  {Code} 'local.get	5'
+; ONE-NEXT: [0x0000000067][003]                  {Code} 'i32.load	20'
+; ONE-NEXT: [0x000000006a][003]                  {Code} 'local.set	12'
+; ONE-NEXT: [0x000000006c][003]      8           {Line} '{{.*}}/general/test.cpp'
+; ONE-NEXT: [0x000000006c][003]                  {Code} 'local.get	5'
+; ONE-NEXT: [0x000000006e][003]                  {Code} 'local.get	12'
+; ONE-NEXT: [0x0000000070][003]                  {Code} 'i32.store	28'
+; ONE-NEXT: [0x0000000073][003]      0           {Line} '{{.*}}/general/test.cpp'
+; ONE-NEXT: [0x0000000073][003]                  {Code} 'end'
+; ONE-NEXT: [0x0000000074][003]      9           {Line} {NewStatement} '{{.*}}/general/test.cpp'
+; ONE-NEXT: [0x0000000074][003]                  {Code} 'local.get	5'
+; ONE-NEXT: [0x0000000076][003]                  {Code} 'i32.load	28'
+; ONE-NEXT: [0x0000000079][003]                  {Code} 'local.set	13'
+; ONE-NEXT: [0x000000007b][003]                  {Code} 'local.get	13'
+; ONE-NEXT: [0x000000007d][003]                  {Code} 'return'
+; ONE-NEXT: [0x000000007e][003]                  {Code} 'end'
+; ONE-NEXT: [0x000000007f][003]      9           {Line} {NewStatement} {EndSequence} '{{.*}}/general/test.cpp'
+; ONE-NEXT: [0x000000007f][003]                  {Code} 'unreachable'
+; ONE-EMPTY:
+; ONE-NEXT: -----------------------------
+; ONE-NEXT: Element      Total    Printed
+; ONE-NEXT: -----------------------------
+; ONE-NEXT: Scopes           3          3
+; ONE-NEXT: Symbols          4          4
+; ONE-NEXT: Types            5          5
+; ONE-NEXT: Lines           73         73
+; ONE-NEXT: -----------------------------
+; ONE-NEXT: Total           85         85
+; ONE-EMPTY:
+; ONE-NEXT: Scope Sizes:
+; ONE-NEXT:        180 (100.00%) : [0x000000000b][001]              {CompileUnit} 'test.cpp'
+; ONE-NEXT:        105 ( 58.33%) : [0x0000000026][002]      2         {Function} extern not_inlined 'foo' -> [0x0000000090]'int'
+; ONE-NEXT:         23 ( 12.78%) : [0x000000006c][003]                  {Block}
+; ONE-EMPTY:
+; ONE-NEXT: Totals by lexical level:
+; ONE-NEXT: [001]:        180 (100.00%)
+; ONE-NEXT: [002]:        105 ( 58.33%)
+; ONE-NEXT: [003]:         23 ( 12.78%)
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/definitions.h b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/definitions.h
new file mode 100644
index 0000000..dfbd3db
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/definitions.h
@@ -0,0 +1,30 @@
+//-----------------------------------------------------------------------------
+// Definitions.
+//-----------------------------------------------------------------------------
+#ifndef SUITE_DEFINITIONS_H
+#define SUITE_DEFINITIONS_H
+
+#ifdef _MSC_VER
+#define forceinline __forceinline
+#define OPTIMIZE_OFF __pragma(optimize("", off))
+#define OPTIMIZE_ON  __pragma(optimize("", on))
+#elif defined(__clang__)
+#if __has_attribute(__always_inline__)
+#define forceinline inline __attribute__((__always_inline__))
+#else
+#define forceinline inline
+#endif
+#define OPTIMIZE_OFF _Pragma("clang optimize off")
+#define OPTIMIZE_ON  _Pragma("clang optimize on")
+#elif defined(__GNUC__)
+#define forceinline inline __attribute__((__always_inline__))
+#define OPTIMIZE_OFF _Pragma("GCC optimize off")
+#define OPTIMIZE_ON  _Pragma("GCC optimize on")
+#else
+#define forceinline inline
+#define OPTIMIZE_OFF
+#define OPTIMIZE_ON
+#error
+#endif
+
+#endif // SUITE_DEFINITIONS_H
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/hello-world-clang.s b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/hello-world-clang.s
new file mode 100644
index 0000000..bfba259
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/hello-world-clang.s
@@ -0,0 +1,286 @@
+	.text
+	.file	"hello-world.cpp"
+	.file	1 "/data/projects/scripts/regression-suite/input/general" "hello-world.cpp"
+	.globaltype	__stack_pointer, i32
+	.functype	__original_main () -> (i32)
+	.functype	_Z6printfPKcz (i32, i32) -> (i32)
+	.functype	main (i32, i32) -> (i32)
+	.section	.text.__original_main,"",@
+	.hidden	__original_main                 # -- Begin function __original_main
+	.globl	__original_main
+	.type	__original_main,@function
+__original_main:                        # @__original_main
+.Lfunc_begin0:
+	.loc	1 4 0                           # hello-world.cpp:4:0
+	.functype	__original_main () -> (i32)
+	.local  	i32, i32, i32, i32, i32, i32, i32, i32, i32
+# %bb.0:                                # %entry
+	global.get	__stack_pointer
+	local.set	0
+	i32.const	16
+	local.set	1
+	local.get	0
+	local.get	1
+	i32.sub 
+	local.set	2
+	local.get	2
+	global.set	__stack_pointer
+	i32.const	0
+	local.set	3
+	local.get	2
+	local.get	3
+	i32.store	12
+.Ltmp0:
+	.loc	1 5 3 prologue_end              # hello-world.cpp:5:3
+	i32.const	.L.str
+	local.set	4
+	i32.const	0
+	local.set	5
+	local.get	4
+	local.get	5
+	call	_Z6printfPKcz
+	drop
+	.loc	1 6 3                           # hello-world.cpp:6:3
+	i32.const	0
+	local.set	6
+	i32.const	16
+	local.set	7
+	local.get	2
+	local.get	7
+	i32.add 
+	local.set	8
+	local.get	8
+	global.set	__stack_pointer
+	local.get	6
+	return
+	end_function
+.Ltmp1:
+.Lfunc_end0:
+                                        # -- End function
+	.section	.text.main,"",@
+	.hidden	main                            # -- Begin function main
+	.globl	main
+	.type	main,@function
+main:                                   # @main
+.Lfunc_begin1:
+	.functype	main (i32, i32) -> (i32)
+	.local  	i32
+# %bb.0:                                # %body
+	call	__original_main
+	local.set	2
+	local.get	2
+	return
+	end_function
+.Lfunc_end1:
+                                        # -- End function
+	.type	.L.str,@object                  # @.str
+	.section	.rodata..L.str,"S",@
+.L.str:
+	.asciz	"Hello, World\n"
+	.size	.L.str, 14
+
+	.globl	__main_void
+	.type	__main_void,@function
+	.hidden	__main_void
+.set __main_void, __original_main
+	.section	.debug_abbrev,"",@
+	.int8	1                               # Abbreviation Code
+	.int8	17                              # DW_TAG_compile_unit
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	37                              # DW_AT_producer
+	.int8	14                              # DW_FORM_strp
+	.int8	19                              # DW_AT_language
+	.int8	5                               # DW_FORM_data2
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	16                              # DW_AT_stmt_list
+	.int8	23                              # DW_FORM_sec_offset
+	.int8	27                              # DW_AT_comp_dir
+	.int8	14                              # DW_FORM_strp
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	18                              # DW_AT_high_pc
+	.int8	6                               # DW_FORM_data4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	2                               # Abbreviation Code
+	.int8	52                              # DW_TAG_variable
+	.int8	0                               # DW_CHILDREN_no
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	2                               # DW_AT_location
+	.int8	24                              # DW_FORM_exprloc
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	3                               # Abbreviation Code
+	.int8	1                               # DW_TAG_array_type
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	4                               # Abbreviation Code
+	.int8	33                              # DW_TAG_subrange_type
+	.int8	0                               # DW_CHILDREN_no
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	55                              # DW_AT_count
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	5                               # Abbreviation Code
+	.int8	38                              # DW_TAG_const_type
+	.int8	0                               # DW_CHILDREN_no
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	6                               # Abbreviation Code
+	.int8	36                              # DW_TAG_base_type
+	.int8	0                               # DW_CHILDREN_no
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	62                              # DW_AT_encoding
+	.int8	11                              # DW_FORM_data1
+	.int8	11                              # DW_AT_byte_size
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	7                               # Abbreviation Code
+	.int8	36                              # DW_TAG_base_type
+	.int8	0                               # DW_CHILDREN_no
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	11                              # DW_AT_byte_size
+	.int8	11                              # DW_FORM_data1
+	.int8	62                              # DW_AT_encoding
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	8                               # Abbreviation Code
+	.int8	46                              # DW_TAG_subprogram
+	.int8	0                               # DW_CHILDREN_no
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	18                              # DW_AT_high_pc
+	.int8	6                               # DW_FORM_data4
+	.int8	64                              # DW_AT_frame_base
+	.int8	24                              # DW_FORM_exprloc
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	63                              # DW_AT_external
+	.int8	25                              # DW_FORM_flag_present
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	0                               # EOM(3)
+	.section	.debug_info,"",@
+.Lcu_begin0:
+	.int32	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.int16	4                               # DWARF version number
+	.int32	.debug_abbrev0                  # Offset Into Abbrev. Section
+	.int8	4                               # Address Size (in bytes)
+	.int8	1                               # Abbrev [1] 0xb:0x67 DW_TAG_compile_unit
+	.int32	.Linfo_string0                  # DW_AT_producer
+	.int16	33                              # DW_AT_language
+	.int32	.Linfo_string1                  # DW_AT_name
+	.int32	.Lline_table_start0             # DW_AT_stmt_list
+	.int32	.Linfo_string2                  # DW_AT_comp_dir
+	.int32	.Lfunc_begin0                   # DW_AT_low_pc
+	.int32	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.int8	2                               # Abbrev [2] 0x26:0xd DW_TAG_variable
+	.int32	51                              # DW_AT_type
+	.int8	1                               # DW_AT_decl_file
+	.int8	5                               # DW_AT_decl_line
+	.int8	5                               # DW_AT_location
+	.int8	3
+	.int32	.L.str
+	.int8	3                               # Abbrev [3] 0x33:0xc DW_TAG_array_type
+	.int32	63                              # DW_AT_type
+	.int8	4                               # Abbrev [4] 0x38:0x6 DW_TAG_subrange_type
+	.int32	75                              # DW_AT_type
+	.int8	14                              # DW_AT_count
+	.int8	0                               # End Of Children Mark
+	.int8	5                               # Abbrev [5] 0x3f:0x5 DW_TAG_const_type
+	.int32	68                              # DW_AT_type
+	.int8	6                               # Abbrev [6] 0x44:0x7 DW_TAG_base_type
+	.int32	.Linfo_string3                  # DW_AT_name
+	.int8	6                               # DW_AT_encoding
+	.int8	1                               # DW_AT_byte_size
+	.int8	7                               # Abbrev [7] 0x4b:0x7 DW_TAG_base_type
+	.int32	.Linfo_string4                  # DW_AT_name
+	.int8	8                               # DW_AT_byte_size
+	.int8	7                               # DW_AT_encoding
+	.int8	8                               # Abbrev [8] 0x52:0x18 DW_TAG_subprogram
+	.int32	.Lfunc_begin0                   # DW_AT_low_pc
+	.int32	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.int8	4                               # DW_AT_frame_base
+	.int8	237
+	.int8	0
+	.int8	2
+	.int8	159
+	.int32	.Linfo_string5                  # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	3                               # DW_AT_decl_line
+	.int32	106                             # DW_AT_type
+                                        # DW_AT_external
+	.int8	6                               # Abbrev [6] 0x6a:0x7 DW_TAG_base_type
+	.int32	.Linfo_string6                  # DW_AT_name
+	.int8	5                               # DW_AT_encoding
+	.int8	4                               # DW_AT_byte_size
+	.int8	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_str,"S",@
+.Linfo_string0:
+	.asciz	"clang version 19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)" # string offset=0
+.Linfo_string1:
+	.asciz	"hello-world.cpp"               # string offset=111
+.Linfo_string2:
+	.asciz	"/data/projects/scripts/regression-suite/input/general" # string offset=127
+.Linfo_string3:
+	.asciz	"char"                          # string offset=181
+.Linfo_string4:
+	.asciz	"__ARRAY_SIZE_TYPE__"           # string offset=186
+.Linfo_string5:
+	.asciz	"main"                          # string offset=206
+.Linfo_string6:
+	.asciz	"int"                           # string offset=211
+	.ident	"clang version 19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)"
+	.no_dead_strip	__indirect_function_table
+	.section	.custom_section.producers,"",@
+	.int8	2
+	.int8	8
+	.ascii	"language"
+	.int8	1
+	.int8	14
+	.ascii	"C_plus_plus_14"
+	.int8	0
+	.int8	12
+	.ascii	"processed-by"
+	.int8	1
+	.int8	5
+	.ascii	"clang"
+	.int8	96
+	.ascii	"19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)"
+	.section	.debug_str,"S",@
+	.section	.custom_section.target_features,"",@
+	.int8	2
+	.int8	43
+	.int8	15
+	.ascii	"mutable-globals"
+	.int8	43
+	.int8	8
+	.ascii	"sign-ext"
+	.section	.debug_str,"S",@
+	.section	.debug_line,"",@
+.Lline_table_start0:
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/hello-world.cpp b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/hello-world.cpp
new file mode 100644
index 0000000..73a8e24
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/hello-world.cpp
@@ -0,0 +1,7 @@
+extern int printf(const char * format, ... );
+
+int main()
+{
+  printf("Hello, World\n");
+  return 0;
+}
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-43860-clang.s b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-43860-clang.s
new file mode 100644
index 0000000..fb70b36
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-43860-clang.s
@@ -0,0 +1,457 @@
+	.text
+	.file	"pr-43860.cpp"
+	.globaltype	__stack_pointer, i32
+	.functype	_Z4testii (i32, i32) -> (i32)
+	.section	.text._Z4testii,"",@
+	.hidden	_Z4testii                       # -- Begin function _Z4testii
+	.globl	_Z4testii
+	.type	_Z4testii,@function
+_Z4testii:                              # @_Z4testii
+.Lfunc_begin0:
+	.file	1 "/data/projects/scripts/regression-suite/input/general" "pr-43860.cpp"
+	.loc	1 11 0                          # pr-43860.cpp:11:0
+	.functype	_Z4testii (i32, i32) -> (i32)
+	.local  	i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32
+# %bb.0:                                # %entry
+	global.get	__stack_pointer
+	local.set	2
+	i32.const	32
+	local.set	3
+	local.get	2
+	local.get	3
+	i32.sub 
+	local.set	4
+	local.get	4
+	local.get	0
+	i32.store	16
+	local.get	4
+	local.get	1
+	i32.store	12
+.Ltmp0:
+	.loc	1 12 11 prologue_end            # pr-43860.cpp:12:11
+	local.get	4
+	i32.load	16
+	local.set	5
+	.loc	1 12 7 is_stmt 0                # pr-43860.cpp:12:7
+	local.get	4
+	local.get	5
+	i32.store	8
+	.loc	1 13 23 is_stmt 1               # pr-43860.cpp:13:23
+	local.get	4
+	i32.load	12
+	local.set	6
+	local.get	4
+	local.get	6
+	i32.store	28
+.Ltmp1:
+	.loc	1 3 15                          # pr-43860.cpp:3:15
+	local.get	4
+	i32.load	28
+	local.set	7
+	.loc	1 3 7 is_stmt 0                 # pr-43860.cpp:3:7
+	local.get	4
+	local.get	7
+	i32.store	24
+.Ltmp2:
+	.loc	1 5 17 is_stmt 1                # pr-43860.cpp:5:17
+	local.get	4
+	i32.load	28
+	local.set	8
+	.loc	1 5 25 is_stmt 0                # pr-43860.cpp:5:25
+	local.get	4
+	i32.load	24
+	local.set	9
+	.loc	1 5 23                          # pr-43860.cpp:5:23
+	local.get	8
+	local.get	9
+	i32.add 
+	local.set	10
+	.loc	1 5 9                           # pr-43860.cpp:5:9
+	local.get	4
+	local.get	10
+	i32.store	20
+	.loc	1 6 13 is_stmt 1                # pr-43860.cpp:6:13
+	local.get	4
+	i32.load	20
+	local.set	11
+	.loc	1 6 11 is_stmt 0                # pr-43860.cpp:6:11
+	local.get	4
+	local.get	11
+	i32.store	24
+.Ltmp3:
+	.loc	1 8 10 is_stmt 1                # pr-43860.cpp:8:10
+	local.get	4
+	i32.load	24
+	local.set	12
+.Ltmp4:
+	.loc	1 13 5                          # pr-43860.cpp:13:5
+	local.get	4
+	i32.load	8
+	local.set	13
+	local.get	13
+	local.get	12
+	i32.add 
+	local.set	14
+	local.get	4
+	local.get	14
+	i32.store	8
+	.loc	1 14 10                         # pr-43860.cpp:14:10
+	local.get	4
+	i32.load	8
+	local.set	15
+	.loc	1 14 3 is_stmt 0                # pr-43860.cpp:14:3
+	local.get	15
+	return
+	end_function
+.Ltmp5:
+.Lfunc_end0:
+                                        # -- End function
+	.section	.debug_abbrev,"",@
+	.int8	1                               # Abbreviation Code
+	.int8	17                              # DW_TAG_compile_unit
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	37                              # DW_AT_producer
+	.int8	14                              # DW_FORM_strp
+	.int8	19                              # DW_AT_language
+	.int8	5                               # DW_FORM_data2
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	16                              # DW_AT_stmt_list
+	.int8	23                              # DW_FORM_sec_offset
+	.int8	27                              # DW_AT_comp_dir
+	.int8	14                              # DW_FORM_strp
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	18                              # DW_AT_high_pc
+	.int8	6                               # DW_FORM_data4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	2                               # Abbreviation Code
+	.int8	46                              # DW_TAG_subprogram
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	110                             # DW_AT_linkage_name
+	.int8	14                              # DW_FORM_strp
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	63                              # DW_AT_external
+	.int8	25                              # DW_FORM_flag_present
+	.int8	32                              # DW_AT_inline
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	3                               # Abbreviation Code
+	.int8	5                               # DW_TAG_formal_parameter
+	.int8	0                               # DW_CHILDREN_no
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	4                               # Abbreviation Code
+	.int8	52                              # DW_TAG_variable
+	.int8	0                               # DW_CHILDREN_no
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	5                               # Abbreviation Code
+	.int8	11                              # DW_TAG_lexical_block
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	6                               # Abbreviation Code
+	.int8	36                              # DW_TAG_base_type
+	.int8	0                               # DW_CHILDREN_no
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	62                              # DW_AT_encoding
+	.int8	11                              # DW_FORM_data1
+	.int8	11                              # DW_AT_byte_size
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	7                               # Abbreviation Code
+	.int8	46                              # DW_TAG_subprogram
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	18                              # DW_AT_high_pc
+	.int8	6                               # DW_FORM_data4
+	.int8	64                              # DW_AT_frame_base
+	.int8	24                              # DW_FORM_exprloc
+	.int8	110                             # DW_AT_linkage_name
+	.int8	14                              # DW_FORM_strp
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	63                              # DW_AT_external
+	.int8	25                              # DW_FORM_flag_present
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	8                               # Abbreviation Code
+	.int8	5                               # DW_TAG_formal_parameter
+	.int8	0                               # DW_CHILDREN_no
+	.int8	2                               # DW_AT_location
+	.int8	24                              # DW_FORM_exprloc
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	9                               # Abbreviation Code
+	.int8	52                              # DW_TAG_variable
+	.int8	0                               # DW_CHILDREN_no
+	.int8	2                               # DW_AT_location
+	.int8	24                              # DW_FORM_exprloc
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	10                              # Abbreviation Code
+	.int8	29                              # DW_TAG_inlined_subroutine
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	49                              # DW_AT_abstract_origin
+	.int8	19                              # DW_FORM_ref4
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	18                              # DW_AT_high_pc
+	.int8	6                               # DW_FORM_data4
+	.int8	88                              # DW_AT_call_file
+	.int8	11                              # DW_FORM_data1
+	.int8	89                              # DW_AT_call_line
+	.int8	11                              # DW_FORM_data1
+	.int8	87                              # DW_AT_call_column
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	11                              # Abbreviation Code
+	.int8	5                               # DW_TAG_formal_parameter
+	.int8	0                               # DW_CHILDREN_no
+	.int8	2                               # DW_AT_location
+	.int8	24                              # DW_FORM_exprloc
+	.int8	49                              # DW_AT_abstract_origin
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	12                              # Abbreviation Code
+	.int8	52                              # DW_TAG_variable
+	.int8	0                               # DW_CHILDREN_no
+	.int8	2                               # DW_AT_location
+	.int8	24                              # DW_FORM_exprloc
+	.int8	49                              # DW_AT_abstract_origin
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	13                              # Abbreviation Code
+	.int8	11                              # DW_TAG_lexical_block
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	18                              # DW_AT_high_pc
+	.int8	6                               # DW_FORM_data4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	0                               # EOM(3)
+	.section	.debug_info,"",@
+.Lcu_begin0:
+	.int32	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.int16	4                               # DWARF version number
+	.int32	.debug_abbrev0                  # Offset Into Abbrev. Section
+	.int8	4                               # Address Size (in bytes)
+	.int8	1                               # Abbrev [1] 0xb:0xd1 DW_TAG_compile_unit
+	.int32	.Linfo_string0                  # DW_AT_producer
+	.int16	33                              # DW_AT_language
+	.int32	.Linfo_string1                  # DW_AT_name
+	.int32	.Lline_table_start0             # DW_AT_stmt_list
+	.int32	.Linfo_string2                  # DW_AT_comp_dir
+	.int32	.Lfunc_begin0                   # DW_AT_low_pc
+	.int32	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.int8	2                               # Abbrev [2] 0x26:0x34 DW_TAG_subprogram
+	.int32	.Linfo_string3                  # DW_AT_linkage_name
+	.int32	.Linfo_string4                  # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	2                               # DW_AT_decl_line
+	.int32	90                              # DW_AT_type
+                                        # DW_AT_external
+	.int8	1                               # DW_AT_inline
+	.int8	3                               # Abbrev [3] 0x36:0xb DW_TAG_formal_parameter
+	.int32	.Linfo_string6                  # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	2                               # DW_AT_decl_line
+	.int32	90                              # DW_AT_type
+	.int8	4                               # Abbrev [4] 0x41:0xb DW_TAG_variable
+	.int32	.Linfo_string7                  # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	3                               # DW_AT_decl_line
+	.int32	90                              # DW_AT_type
+	.int8	5                               # Abbrev [5] 0x4c:0xd DW_TAG_lexical_block
+	.int8	4                               # Abbrev [4] 0x4d:0xb DW_TAG_variable
+	.int32	.Linfo_string8                  # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	5                               # DW_AT_decl_line
+	.int32	90                              # DW_AT_type
+	.int8	0                               # End Of Children Mark
+	.int8	0                               # End Of Children Mark
+	.int8	6                               # Abbrev [6] 0x5a:0x7 DW_TAG_base_type
+	.int32	.Linfo_string5                  # DW_AT_name
+	.int8	5                               # DW_AT_encoding
+	.int8	4                               # DW_AT_byte_size
+	.int8	7                               # Abbrev [7] 0x61:0x7a DW_TAG_subprogram
+	.int32	.Lfunc_begin0                   # DW_AT_low_pc
+	.int32	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.int8	4                               # DW_AT_frame_base
+	.int8	237
+	.int8	0
+	.int8	4
+	.int8	159
+	.int32	.Linfo_string9                  # DW_AT_linkage_name
+	.int32	.Linfo_string10                 # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	11                              # DW_AT_decl_line
+	.int32	90                              # DW_AT_type
+                                        # DW_AT_external
+	.int8	8                               # Abbrev [8] 0x7d:0xe DW_TAG_formal_parameter
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	16
+	.int32	.Linfo_string11                 # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	11                              # DW_AT_decl_line
+	.int32	90                              # DW_AT_type
+	.int8	8                               # Abbrev [8] 0x8b:0xe DW_TAG_formal_parameter
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	12
+	.int32	.Linfo_string12                 # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	11                              # DW_AT_decl_line
+	.int32	90                              # DW_AT_type
+	.int8	9                               # Abbrev [9] 0x99:0xe DW_TAG_variable
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	8
+	.int32	.Linfo_string13                 # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	12                              # DW_AT_decl_line
+	.int32	90                              # DW_AT_type
+	.int8	10                              # Abbrev [10] 0xa7:0x33 DW_TAG_inlined_subroutine
+	.int32	38                              # DW_AT_abstract_origin
+	.int32	.Ltmp1                          # DW_AT_low_pc
+	.int32	.Ltmp4-.Ltmp1                   # DW_AT_high_pc
+	.int8	1                               # DW_AT_call_file
+	.int8	13                              # DW_AT_call_line
+	.int8	8                               # DW_AT_call_column
+	.int8	11                              # Abbrev [11] 0xb7:0x8 DW_TAG_formal_parameter
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	28
+	.int32	54                              # DW_AT_abstract_origin
+	.int8	12                              # Abbrev [12] 0xbf:0x8 DW_TAG_variable
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	24
+	.int32	65                              # DW_AT_abstract_origin
+	.int8	13                              # Abbrev [13] 0xc7:0x12 DW_TAG_lexical_block
+	.int32	.Ltmp2                          # DW_AT_low_pc
+	.int32	.Ltmp3-.Ltmp2                   # DW_AT_high_pc
+	.int8	12                              # Abbrev [12] 0xd0:0x8 DW_TAG_variable
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	20
+	.int32	77                              # DW_AT_abstract_origin
+	.int8	0                               # End Of Children Mark
+	.int8	0                               # End Of Children Mark
+	.int8	0                               # End Of Children Mark
+	.int8	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_str,"S",@
+.Linfo_string0:
+	.asciz	"clang version 19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)" # string offset=0
+.Linfo_string1:
+	.asciz	"pr-43860.cpp"                  # string offset=111
+.Linfo_string2:
+	.asciz	"/data/projects/scripts/regression-suite/input/general" # string offset=124
+.Linfo_string3:
+	.asciz	"_Z14InlineFunctioni"           # string offset=178
+.Linfo_string4:
+	.asciz	"InlineFunction"                # string offset=198
+.Linfo_string5:
+	.asciz	"int"                           # string offset=213
+.Linfo_string6:
+	.asciz	"Param"                         # string offset=217
+.Linfo_string7:
+	.asciz	"Var_1"                         # string offset=223
+.Linfo_string8:
+	.asciz	"Var_2"                         # string offset=229
+.Linfo_string9:
+	.asciz	"_Z4testii"                     # string offset=235
+.Linfo_string10:
+	.asciz	"test"                          # string offset=245
+.Linfo_string11:
+	.asciz	"Param_1"                       # string offset=250
+.Linfo_string12:
+	.asciz	"Param_2"                       # string offset=258
+.Linfo_string13:
+	.asciz	"A"                             # string offset=266
+	.ident	"clang version 19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)"
+	.section	.custom_section.producers,"",@
+	.int8	2
+	.int8	8
+	.ascii	"language"
+	.int8	1
+	.int8	14
+	.ascii	"C_plus_plus_14"
+	.int8	0
+	.int8	12
+	.ascii	"processed-by"
+	.int8	1
+	.int8	5
+	.ascii	"clang"
+	.int8	96
+	.ascii	"19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)"
+	.section	.debug_str,"S",@
+	.section	.custom_section.target_features,"",@
+	.int8	2
+	.int8	43
+	.int8	15
+	.ascii	"mutable-globals"
+	.int8	43
+	.int8	8
+	.ascii	"sign-ext"
+	.section	.debug_str,"S",@
+	.section	.debug_line,"",@
+.Lline_table_start0:
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-43860.cpp b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-43860.cpp
new file mode 100644
index 0000000..a3d3b76
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-43860.cpp
@@ -0,0 +1,15 @@
+#include "definitions.h"
+forceinline int InlineFunction(int Param) {
+  int Var_1 = Param;
+  {
+    int Var_2 = Param + Var_1;
+    Var_1 = Var_2;
+  }
+  return Var_1;
+}
+
+int test(int Param_1, int Param_2) {
+  int A = Param_1;
+  A += InlineFunction(Param_2);
+  return A;
+}
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-44884-clang.s b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-44884-clang.s
new file mode 100644
index 0000000..af9875b
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-44884-clang.s
@@ -0,0 +1,488 @@
+	.text
+	.file	"pr-44884.cpp"
+	.globaltype	__stack_pointer, i32
+	.functype	_Z3barf (f32) -> (i32)
+	.functype	_Z3fooc (i32) -> (i32)
+	.section	.text._Z3barf,"",@
+	.hidden	_Z3barf                         # -- Begin function _Z3barf
+	.globl	_Z3barf
+	.type	_Z3barf,@function
+_Z3barf:                                # @_Z3barf
+.Lfunc_begin0:
+	.file	1 "/data/projects/scripts/regression-suite/input/general" "pr-44884.cpp"
+	.loc	1 1 0                           # pr-44884.cpp:1:0
+	.functype	_Z3barf (f32) -> (i32)
+	.local  	i32, i32, i32, f32, f32, f32, i32, i32, i32, i32, i32, i32
+# %bb.0:                                # %entry
+	global.get	__stack_pointer
+	local.set	1
+	i32.const	16
+	local.set	2
+	local.get	1
+	local.get	2
+	i32.sub 
+	local.set	3
+	local.get	3
+	local.get	0
+	f32.store	12
+.Ltmp0:
+	.loc	1 1 36 prologue_end             # pr-44884.cpp:1:36
+	local.get	3
+	f32.load	12
+	local.set	4
+	local.get	4
+	f32.abs 
+	local.set	5
+	f32.const	0x1p31
+	local.set	6
+	local.get	5
+	local.get	6
+	f32.lt  
+	local.set	7
+	local.get	7
+	i32.eqz
+	local.set	8
+	block   	
+	block   	
+	local.get	8
+	br_if   	0                               # 0: down to label1
+# %bb.1:                                # %entry
+	local.get	4
+	i32.trunc_f32_s
+	local.set	9
+	local.get	9
+	local.set	10
+	br      	1                               # 1: down to label0
+.LBB0_2:                                # %entry
+	.loc	1 0 36 is_stmt 0                # pr-44884.cpp:0:36
+	end_block                               # label1:
+	.loc	1 1 36                          # pr-44884.cpp:1:36
+	i32.const	-2147483648
+	local.set	11
+	local.get	11
+	local.set	10
+.LBB0_3:                                # %entry
+	.loc	1 0 36                          # pr-44884.cpp:0:36
+	end_block                               # label0:
+	.loc	1 1 36                          # pr-44884.cpp:1:36
+	local.get	10
+	local.set	12
+	.loc	1 1 24                          # pr-44884.cpp:1:24
+	local.get	12
+	return
+	end_function
+.Ltmp1:
+.Lfunc_end0:
+                                        # -- End function
+	.section	.text._Z3fooc,"",@
+	.hidden	_Z3fooc                         # -- Begin function _Z3fooc
+	.globl	_Z3fooc
+	.type	_Z3fooc,@function
+_Z3fooc:                                # @_Z3fooc
+.Lfunc_begin1:
+	.loc	1 3 0 is_stmt 1                 # pr-44884.cpp:3:0
+	.functype	_Z3fooc (i32) -> (i32)
+	.local  	i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, f32, f32, i32, i32, i32, i32, i32, i32, i32, i32, i32
+# %bb.0:                                # %entry
+	global.get	__stack_pointer
+	local.set	1
+	i32.const	16
+	local.set	2
+	local.get	1
+	local.get	2
+	i32.sub 
+	local.set	3
+	local.get	3
+	global.set	__stack_pointer
+	local.get	3
+	local.get	0
+	i32.store8	15
+.Ltmp2:
+	.loc	1 5 15 prologue_end             # pr-44884.cpp:5:15
+	local.get	3
+	i32.load8_u	15
+	local.set	4
+	i32.const	24
+	local.set	5
+	local.get	4
+	local.get	5
+	i32.shl 
+	local.set	6
+	local.get	6
+	local.get	5
+	i32.shr_s
+	local.set	7
+	.loc	1 5 7 is_stmt 0                 # pr-44884.cpp:5:7
+	local.get	3
+	local.get	7
+	i32.store	8
+.Ltmp3:
+	.loc	1 9 21 is_stmt 1                # pr-44884.cpp:9:21
+	local.get	3
+	i32.load	8
+	local.set	8
+	.loc	1 9 29 is_stmt 0                # pr-44884.cpp:9:29
+	local.get	3
+	i32.load8_u	15
+	local.set	9
+	i32.const	24
+	local.set	10
+	local.get	9
+	local.get	10
+	i32.shl 
+	local.set	11
+	local.get	11
+	local.get	10
+	i32.shr_s
+	local.set	12
+	.loc	1 9 27                          # pr-44884.cpp:9:27
+	local.get	8
+	local.get	12
+	i32.add 
+	local.set	13
+	.loc	1 9 21                          # pr-44884.cpp:9:21
+	local.get	13
+	f32.convert_i32_s
+	local.set	14
+	.loc	1 9 13                          # pr-44884.cpp:9:13
+	local.get	3
+	local.get	14
+	f32.store	4
+	.loc	1 10 19 is_stmt 1               # pr-44884.cpp:10:19
+	local.get	3
+	f32.load	4
+	local.set	15
+	.loc	1 10 15 is_stmt 0               # pr-44884.cpp:10:15
+	local.get	15
+	call	_Z3barf
+	local.set	16
+	.loc	1 10 13                         # pr-44884.cpp:10:13
+	local.get	3
+	local.get	16
+	i32.store	8
+.Ltmp4:
+	.loc	1 13 10 is_stmt 1               # pr-44884.cpp:13:10
+	local.get	3
+	i32.load	8
+	local.set	17
+	.loc	1 13 18 is_stmt 0               # pr-44884.cpp:13:18
+	local.get	3
+	i32.load8_u	15
+	local.set	18
+	i32.const	24
+	local.set	19
+	local.get	18
+	local.get	19
+	i32.shl 
+	local.set	20
+	local.get	20
+	local.get	19
+	i32.shr_s
+	local.set	21
+	.loc	1 13 16                         # pr-44884.cpp:13:16
+	local.get	17
+	local.get	21
+	i32.add 
+	local.set	22
+	.loc	1 13 3                          # pr-44884.cpp:13:3
+	i32.const	16
+	local.set	23
+	local.get	3
+	local.get	23
+	i32.add 
+	local.set	24
+	local.get	24
+	global.set	__stack_pointer
+	local.get	22
+	return
+	end_function
+.Ltmp5:
+.Lfunc_end1:
+                                        # -- End function
+	.section	.debug_abbrev,"",@
+	.int8	1                               # Abbreviation Code
+	.int8	17                              # DW_TAG_compile_unit
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	37                              # DW_AT_producer
+	.int8	14                              # DW_FORM_strp
+	.int8	19                              # DW_AT_language
+	.int8	5                               # DW_FORM_data2
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	16                              # DW_AT_stmt_list
+	.int8	23                              # DW_FORM_sec_offset
+	.int8	27                              # DW_AT_comp_dir
+	.int8	14                              # DW_FORM_strp
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	85                              # DW_AT_ranges
+	.int8	23                              # DW_FORM_sec_offset
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	2                               # Abbreviation Code
+	.int8	36                              # DW_TAG_base_type
+	.int8	0                               # DW_CHILDREN_no
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	62                              # DW_AT_encoding
+	.int8	11                              # DW_FORM_data1
+	.int8	11                              # DW_AT_byte_size
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	3                               # Abbreviation Code
+	.int8	46                              # DW_TAG_subprogram
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	18                              # DW_AT_high_pc
+	.int8	6                               # DW_FORM_data4
+	.int8	64                              # DW_AT_frame_base
+	.int8	24                              # DW_FORM_exprloc
+	.int8	110                             # DW_AT_linkage_name
+	.int8	14                              # DW_FORM_strp
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	63                              # DW_AT_external
+	.int8	25                              # DW_FORM_flag_present
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	4                               # Abbreviation Code
+	.int8	5                               # DW_TAG_formal_parameter
+	.int8	0                               # DW_CHILDREN_no
+	.int8	2                               # DW_AT_location
+	.int8	24                              # DW_FORM_exprloc
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	5                               # Abbreviation Code
+	.int8	52                              # DW_TAG_variable
+	.int8	0                               # DW_CHILDREN_no
+	.int8	2                               # DW_AT_location
+	.int8	24                              # DW_FORM_exprloc
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	6                               # Abbreviation Code
+	.int8	11                              # DW_TAG_lexical_block
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	18                              # DW_AT_high_pc
+	.int8	6                               # DW_FORM_data4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	7                               # Abbreviation Code
+	.int8	22                              # DW_TAG_typedef
+	.int8	0                               # DW_CHILDREN_no
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	0                               # EOM(3)
+	.section	.debug_info,"",@
+.Lcu_begin0:
+	.int32	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.int16	4                               # DWARF version number
+	.int32	.debug_abbrev0                  # Offset Into Abbrev. Section
+	.int8	4                               # Address Size (in bytes)
+	.int8	1                               # Abbrev [1] 0xb:0xca DW_TAG_compile_unit
+	.int32	.Linfo_string0                  # DW_AT_producer
+	.int16	33                              # DW_AT_language
+	.int32	.Linfo_string1                  # DW_AT_name
+	.int32	.Lline_table_start0             # DW_AT_stmt_list
+	.int32	.Linfo_string2                  # DW_AT_comp_dir
+	.int32	0                               # DW_AT_low_pc
+	.int32	.Ldebug_ranges0                 # DW_AT_ranges
+	.int8	2                               # Abbrev [2] 0x26:0x7 DW_TAG_base_type
+	.int32	.Linfo_string3                  # DW_AT_name
+	.int8	5                               # DW_AT_encoding
+	.int8	4                               # DW_AT_byte_size
+	.int8	3                               # Abbrev [3] 0x2d:0x2b DW_TAG_subprogram
+	.int32	.Lfunc_begin0                   # DW_AT_low_pc
+	.int32	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.int8	4                               # DW_AT_frame_base
+	.int8	237
+	.int8	0
+	.int8	3
+	.int8	159
+	.int32	.Linfo_string4                  # DW_AT_linkage_name
+	.int32	.Linfo_string5                  # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	1                               # DW_AT_decl_line
+	.int32	38                              # DW_AT_type
+                                        # DW_AT_external
+	.int8	4                               # Abbrev [4] 0x49:0xe DW_TAG_formal_parameter
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	12
+	.int32	.Linfo_string9                  # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	1                               # DW_AT_decl_line
+	.int32	198                             # DW_AT_type
+	.int8	0                               # End Of Children Mark
+	.int8	3                               # Abbrev [3] 0x58:0x67 DW_TAG_subprogram
+	.int32	.Lfunc_begin1                   # DW_AT_low_pc
+	.int32	.Lfunc_end1-.Lfunc_begin1       # DW_AT_high_pc
+	.int8	4                               # DW_AT_frame_base
+	.int8	237
+	.int8	0
+	.int8	3
+	.int8	159
+	.int32	.Linfo_string6                  # DW_AT_linkage_name
+	.int32	.Linfo_string7                  # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	3                               # DW_AT_decl_line
+	.int32	191                             # DW_AT_type
+                                        # DW_AT_external
+	.int8	4                               # Abbrev [4] 0x74:0xe DW_TAG_formal_parameter
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	15
+	.int32	.Linfo_string11                 # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	3                               # DW_AT_decl_line
+	.int32	205                             # DW_AT_type
+	.int8	5                               # Abbrev [5] 0x82:0xe DW_TAG_variable
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	8
+	.int32	.Linfo_string13                 # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	5                               # DW_AT_decl_line
+	.int32	168                             # DW_AT_type
+	.int8	6                               # Abbrev [6] 0x90:0x18 DW_TAG_lexical_block
+	.int32	.Ltmp3                          # DW_AT_low_pc
+	.int32	.Ltmp4-.Ltmp3                   # DW_AT_high_pc
+	.int8	5                               # Abbrev [5] 0x99:0xe DW_TAG_variable
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	4
+	.int32	.Linfo_string15                 # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	9                               # DW_AT_decl_line
+	.int32	179                             # DW_AT_type
+	.int8	0                               # End Of Children Mark
+	.int8	7                               # Abbrev [7] 0xa8:0xb DW_TAG_typedef
+	.int32	38                              # DW_AT_type
+	.int32	.Linfo_string14                 # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	4                               # DW_AT_decl_line
+	.int8	7                               # Abbrev [7] 0xb3:0xb DW_TAG_typedef
+	.int32	198                             # DW_AT_type
+	.int32	.Linfo_string16                 # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	7                               # DW_AT_decl_line
+	.int8	0                               # End Of Children Mark
+	.int8	2                               # Abbrev [2] 0xbf:0x7 DW_TAG_base_type
+	.int32	.Linfo_string8                  # DW_AT_name
+	.int8	7                               # DW_AT_encoding
+	.int8	4                               # DW_AT_byte_size
+	.int8	2                               # Abbrev [2] 0xc6:0x7 DW_TAG_base_type
+	.int32	.Linfo_string10                 # DW_AT_name
+	.int8	4                               # DW_AT_encoding
+	.int8	4                               # DW_AT_byte_size
+	.int8	2                               # Abbrev [2] 0xcd:0x7 DW_TAG_base_type
+	.int32	.Linfo_string12                 # DW_AT_name
+	.int8	6                               # DW_AT_encoding
+	.int8	1                               # DW_AT_byte_size
+	.int8	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_ranges,"",@
+.Ldebug_ranges0:
+	.int32	.Lfunc_begin0
+	.int32	.Lfunc_end0
+	.int32	.Lfunc_begin1
+	.int32	.Lfunc_end1
+	.int32	0
+	.int32	0
+	.section	.debug_str,"S",@
+.Linfo_string0:
+	.asciz	"clang version 19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)" # string offset=0
+.Linfo_string1:
+	.asciz	"pr-44884.cpp"                  # string offset=111
+.Linfo_string2:
+	.asciz	"/data/projects/scripts/regression-suite/input/general" # string offset=124
+.Linfo_string3:
+	.asciz	"int"                           # string offset=178
+.Linfo_string4:
+	.asciz	"_Z3barf"                       # string offset=182
+.Linfo_string5:
+	.asciz	"bar"                           # string offset=190
+.Linfo_string6:
+	.asciz	"_Z3fooc"                       # string offset=194
+.Linfo_string7:
+	.asciz	"foo"                           # string offset=202
+.Linfo_string8:
+	.asciz	"unsigned int"                  # string offset=206
+.Linfo_string9:
+	.asciz	"Input"                         # string offset=219
+.Linfo_string10:
+	.asciz	"float"                         # string offset=225
+.Linfo_string11:
+	.asciz	"Param"                         # string offset=231
+.Linfo_string12:
+	.asciz	"char"                          # string offset=237
+.Linfo_string13:
+	.asciz	"Value"                         # string offset=242
+.Linfo_string14:
+	.asciz	"INT"                           # string offset=248
+.Linfo_string15:
+	.asciz	"Added"                         # string offset=252
+.Linfo_string16:
+	.asciz	"FLOAT"                         # string offset=258
+	.ident	"clang version 19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)"
+	.section	.custom_section.producers,"",@
+	.int8	2
+	.int8	8
+	.ascii	"language"
+	.int8	1
+	.int8	14
+	.ascii	"C_plus_plus_14"
+	.int8	0
+	.int8	12
+	.ascii	"processed-by"
+	.int8	1
+	.int8	5
+	.ascii	"clang"
+	.int8	96
+	.ascii	"19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)"
+	.section	.debug_str,"S",@
+	.section	.custom_section.target_features,"",@
+	.int8	2
+	.int8	43
+	.int8	15
+	.ascii	"mutable-globals"
+	.int8	43
+	.int8	8
+	.ascii	"sign-ext"
+	.section	.debug_str,"S",@
+	.section	.debug_line,"",@
+.Lline_table_start0:
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-44884.cpp b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-44884.cpp
new file mode 100644
index 0000000..4b47aae
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-44884.cpp
@@ -0,0 +1,14 @@
+int bar(float Input) { return (int)Input; }
+
+unsigned foo(char Param) {
+  typedef int INT;
+  INT Value = Param;
+  {
+    typedef float FLOAT;
+    {
+      FLOAT Added = Value + Param;
+      Value = bar(Added);
+    }
+  }
+  return Value + Param;
+}
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-46466-clang.s b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-46466-clang.s
new file mode 100644
index 0000000..1056db0
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-46466-clang.s
@@ -0,0 +1,259 @@
+	.text
+	.file	"pr-46466.cpp"
+	.file	1 "/data/projects/scripts/regression-suite/input/general" "pr-46466.cpp"
+	.functype	_Z4testv () -> (i32)
+	.section	.text._Z4testv,"",@
+	.hidden	_Z4testv                        # -- Begin function _Z4testv
+	.globl	_Z4testv
+	.type	_Z4testv,@function
+_Z4testv:                               # @_Z4testv
+.Lfunc_begin0:
+	.functype	_Z4testv () -> (i32)
+	.local  	i32
+# %bb.0:                                # %entry
+	.loc	1 10 3 prologue_end             # pr-46466.cpp:10:3
+	i32.const	1
+	local.set	0
+	local.get	0
+	return
+	end_function
+.Ltmp0:
+.Lfunc_end0:
+                                        # -- End function
+	.hidden	S                               # @S
+	.type	S,@object
+	.section	.bss.S,"",@
+	.globl	S
+S:
+	.skip	1
+	.size	S, 1
+
+	.section	.debug_abbrev,"",@
+	.int8	1                               # Abbreviation Code
+	.int8	17                              # DW_TAG_compile_unit
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	37                              # DW_AT_producer
+	.int8	14                              # DW_FORM_strp
+	.int8	19                              # DW_AT_language
+	.int8	5                               # DW_FORM_data2
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	16                              # DW_AT_stmt_list
+	.int8	23                              # DW_FORM_sec_offset
+	.int8	27                              # DW_AT_comp_dir
+	.int8	14                              # DW_FORM_strp
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	18                              # DW_AT_high_pc
+	.int8	6                               # DW_FORM_data4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	2                               # Abbreviation Code
+	.int8	52                              # DW_TAG_variable
+	.int8	0                               # DW_CHILDREN_no
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	63                              # DW_AT_external
+	.int8	25                              # DW_FORM_flag_present
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	2                               # DW_AT_location
+	.int8	24                              # DW_FORM_exprloc
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	3                               # Abbreviation Code
+	.int8	19                              # DW_TAG_structure_type
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	54                              # DW_AT_calling_convention
+	.int8	11                              # DW_FORM_data1
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	11                              # DW_AT_byte_size
+	.int8	11                              # DW_FORM_data1
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	4                               # Abbreviation Code
+	.int8	13                              # DW_TAG_member
+	.int8	0                               # DW_CHILDREN_no
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	56                              # DW_AT_data_member_location
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	5                               # Abbreviation Code
+	.int8	23                              # DW_TAG_union_type
+	.int8	0                               # DW_CHILDREN_no
+	.int8	54                              # DW_AT_calling_convention
+	.int8	11                              # DW_FORM_data1
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	11                              # DW_AT_byte_size
+	.int8	11                              # DW_FORM_data1
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	6                               # Abbreviation Code
+	.int8	46                              # DW_TAG_subprogram
+	.int8	0                               # DW_CHILDREN_no
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	18                              # DW_AT_high_pc
+	.int8	6                               # DW_FORM_data4
+	.int8	64                              # DW_AT_frame_base
+	.int8	24                              # DW_FORM_exprloc
+	.int8	110                             # DW_AT_linkage_name
+	.int8	14                              # DW_FORM_strp
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	63                              # DW_AT_external
+	.int8	25                              # DW_FORM_flag_present
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	7                               # Abbreviation Code
+	.int8	36                              # DW_TAG_base_type
+	.int8	0                               # DW_CHILDREN_no
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	62                              # DW_AT_encoding
+	.int8	11                              # DW_FORM_data1
+	.int8	11                              # DW_AT_byte_size
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	0                               # EOM(3)
+	.section	.debug_info,"",@
+.Lcu_begin0:
+	.int32	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.int16	4                               # DWARF version number
+	.int32	.debug_abbrev0                  # Offset Into Abbrev. Section
+	.int8	4                               # Address Size (in bytes)
+	.int8	1                               # Abbrev [1] 0xb:0x72 DW_TAG_compile_unit
+	.int32	.Linfo_string0                  # DW_AT_producer
+	.int16	33                              # DW_AT_language
+	.int32	.Linfo_string1                  # DW_AT_name
+	.int32	.Lline_table_start0             # DW_AT_stmt_list
+	.int32	.Linfo_string2                  # DW_AT_comp_dir
+	.int32	.Lfunc_begin0                   # DW_AT_low_pc
+	.int32	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.int8	2                               # Abbrev [2] 0x26:0x11 DW_TAG_variable
+	.int32	.Linfo_string3                  # DW_AT_name
+	.int32	55                              # DW_AT_type
+                                        # DW_AT_external
+	.int8	1                               # DW_AT_decl_file
+	.int8	8                               # DW_AT_decl_line
+	.int8	5                               # DW_AT_location
+	.int8	3
+	.int32	S
+	.int8	3                               # Abbrev [3] 0x37:0x1f DW_TAG_structure_type
+	.int8	5                               # DW_AT_calling_convention
+	.int32	.Linfo_string6                  # DW_AT_name
+	.int8	1                               # DW_AT_byte_size
+	.int8	1                               # DW_AT_decl_file
+	.int8	1                               # DW_AT_decl_line
+	.int8	4                               # Abbrev [4] 0x40:0xc DW_TAG_member
+	.int32	.Linfo_string4                  # DW_AT_name
+	.int32	76                              # DW_AT_type
+	.int8	1                               # DW_AT_decl_file
+	.int8	5                               # DW_AT_decl_line
+	.int8	0                               # DW_AT_data_member_location
+	.int8	5                               # Abbrev [5] 0x4c:0x9 DW_TAG_union_type
+	.int8	5                               # DW_AT_calling_convention
+	.int32	.Linfo_string5                  # DW_AT_name
+	.int8	1                               # DW_AT_byte_size
+	.int8	1                               # DW_AT_decl_file
+	.int8	2                               # DW_AT_decl_line
+	.int8	0                               # End Of Children Mark
+	.int8	6                               # Abbrev [6] 0x56:0x1f DW_TAG_subprogram
+	.int32	.Lfunc_begin0                   # DW_AT_low_pc
+	.int32	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.int8	7                               # DW_AT_frame_base
+	.int8	237
+	.int8	3
+	.int32	__stack_pointer
+	.int8	159
+	.int32	.Linfo_string7                  # DW_AT_linkage_name
+	.int32	.Linfo_string8                  # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	9                               # DW_AT_decl_line
+	.int32	117                             # DW_AT_type
+                                        # DW_AT_external
+	.int8	7                               # Abbrev [7] 0x75:0x7 DW_TAG_base_type
+	.int32	.Linfo_string9                  # DW_AT_name
+	.int8	5                               # DW_AT_encoding
+	.int8	4                               # DW_AT_byte_size
+	.int8	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_str,"S",@
+.Linfo_string0:
+	.asciz	"clang version 19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)" # string offset=0
+.Linfo_string1:
+	.asciz	"pr-46466.cpp"                  # string offset=111
+.Linfo_string2:
+	.asciz	"/data/projects/scripts/regression-suite/input/general" # string offset=124
+.Linfo_string3:
+	.asciz	"S"                             # string offset=178
+.Linfo_string4:
+	.asciz	"U"                             # string offset=180
+.Linfo_string5:
+	.asciz	"Union"                         # string offset=182
+.Linfo_string6:
+	.asciz	"Struct"                        # string offset=188
+.Linfo_string7:
+	.asciz	"_Z4testv"                      # string offset=195
+.Linfo_string8:
+	.asciz	"test"                          # string offset=204
+.Linfo_string9:
+	.asciz	"int"                           # string offset=209
+	.ident	"clang version 19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)"
+	.section	.custom_section.producers,"",@
+	.int8	2
+	.int8	8
+	.ascii	"language"
+	.int8	1
+	.int8	14
+	.ascii	"C_plus_plus_14"
+	.int8	0
+	.int8	12
+	.ascii	"processed-by"
+	.int8	1
+	.int8	5
+	.ascii	"clang"
+	.int8	96
+	.ascii	"19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)"
+	.section	.debug_str,"S",@
+	.section	.custom_section.target_features,"",@
+	.int8	2
+	.int8	43
+	.int8	15
+	.ascii	"mutable-globals"
+	.int8	43
+	.int8	8
+	.ascii	"sign-ext"
+	.section	.debug_str,"S",@
+	.section	.debug_line,"",@
+.Lline_table_start0:
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-46466.cpp b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-46466.cpp
new file mode 100644
index 0000000..28be9a5
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/pr-46466.cpp
@@ -0,0 +1,11 @@
+struct Struct {
+  union Union {
+    enum NestedEnum { RED, BLUE };
+  };
+  Union U;
+};
+
+Struct S;
+int test() {
+  return S.U.BLUE;
+}
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/test-clang.s b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/test-clang.s
new file mode 100644
index 0000000..02afaf7
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/test-clang.s
@@ -0,0 +1,366 @@
+	.text
+	.file	"test.cpp"
+	.globaltype	__stack_pointer, i32
+	.functype	_Z3fooPKijb (i32, i32, i32) -> (i32)
+	.section	.text._Z3fooPKijb,"",@
+	.hidden	_Z3fooPKijb                     # -- Begin function _Z3fooPKijb
+	.globl	_Z3fooPKijb
+	.type	_Z3fooPKijb,@function
+_Z3fooPKijb:                            # @_Z3fooPKijb
+.Lfunc_begin0:
+	.file	1 "/data/projects/scripts/regression-suite/input/general" "test.cpp"
+	.loc	1 2 0                           # test.cpp:2:0
+	.functype	_Z3fooPKijb (i32, i32, i32) -> (i32)
+	.local  	i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32
+# %bb.0:                                # %entry
+	global.get	__stack_pointer
+	local.set	3
+	i32.const	32
+	local.set	4
+	local.get	3
+	local.get	4
+	i32.sub 
+	local.set	5
+	local.get	5
+	local.get	0
+	i32.store	24
+	local.get	5
+	local.get	1
+	i32.store	20
+	local.get	2
+	local.set	6
+	local.get	5
+	local.get	6
+	i32.store8	19
+.Ltmp0:
+	.loc	1 3 7 prologue_end              # test.cpp:3:7
+	local.get	5
+	i32.load8_u	19
+	local.set	7
+.Ltmp1:
+	.loc	1 3 7 is_stmt 0                 # test.cpp:3:7
+	i32.const	1
+	local.set	8
+	local.get	7
+	local.get	8
+	i32.and 
+	local.set	9
+	block   	
+	block   	
+	local.get	9
+	i32.eqz
+	br_if   	0                               # 0: down to label1
+# %bb.1:                                # %if.then
+.Ltmp2:
+	.loc	1 5 19 is_stmt 1                # test.cpp:5:19
+	i32.const	7
+	local.set	10
+	local.get	5
+	local.get	10
+	i32.store	12
+	.loc	1 6 5                           # test.cpp:6:5
+	i32.const	7
+	local.set	11
+	local.get	5
+	local.get	11
+	i32.store	28
+	br      	1                               # 1: down to label0
+.Ltmp3:
+.LBB0_2:                                # %if.end
+	.loc	1 0 5 is_stmt 0                 # test.cpp:0:5
+	end_block                               # label1:
+	.loc	1 8 10 is_stmt 1                # test.cpp:8:10
+	local.get	5
+	i32.load	20
+	local.set	12
+	.loc	1 8 3 is_stmt 0                 # test.cpp:8:3
+	local.get	5
+	local.get	12
+	i32.store	28
+.LBB0_3:                                # %return
+	.loc	1 0 3                           # test.cpp:0:3
+	end_block                               # label0:
+	.loc	1 9 1 is_stmt 1                 # test.cpp:9:1
+	local.get	5
+	i32.load	28
+	local.set	13
+	local.get	13
+	return
+	end_function
+.Ltmp4:
+.Lfunc_end0:
+                                        # -- End function
+	.section	.debug_abbrev,"",@
+	.int8	1                               # Abbreviation Code
+	.int8	17                              # DW_TAG_compile_unit
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	37                              # DW_AT_producer
+	.int8	14                              # DW_FORM_strp
+	.int8	19                              # DW_AT_language
+	.int8	5                               # DW_FORM_data2
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	16                              # DW_AT_stmt_list
+	.int8	23                              # DW_FORM_sec_offset
+	.int8	27                              # DW_AT_comp_dir
+	.int8	14                              # DW_FORM_strp
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	18                              # DW_AT_high_pc
+	.int8	6                               # DW_FORM_data4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	2                               # Abbreviation Code
+	.int8	46                              # DW_TAG_subprogram
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	18                              # DW_AT_high_pc
+	.int8	6                               # DW_FORM_data4
+	.int8	64                              # DW_AT_frame_base
+	.int8	24                              # DW_FORM_exprloc
+	.int8	110                             # DW_AT_linkage_name
+	.int8	14                              # DW_FORM_strp
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	63                              # DW_AT_external
+	.int8	25                              # DW_FORM_flag_present
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	3                               # Abbreviation Code
+	.int8	5                               # DW_TAG_formal_parameter
+	.int8	0                               # DW_CHILDREN_no
+	.int8	2                               # DW_AT_location
+	.int8	24                              # DW_FORM_exprloc
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	4                               # Abbreviation Code
+	.int8	11                              # DW_TAG_lexical_block
+	.int8	1                               # DW_CHILDREN_yes
+	.int8	17                              # DW_AT_low_pc
+	.int8	1                               # DW_FORM_addr
+	.int8	18                              # DW_AT_high_pc
+	.int8	6                               # DW_FORM_data4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	5                               # Abbreviation Code
+	.int8	52                              # DW_TAG_variable
+	.int8	0                               # DW_CHILDREN_no
+	.int8	2                               # DW_AT_location
+	.int8	24                              # DW_FORM_exprloc
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	6                               # Abbreviation Code
+	.int8	22                              # DW_TAG_typedef
+	.int8	0                               # DW_CHILDREN_no
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	58                              # DW_AT_decl_file
+	.int8	11                              # DW_FORM_data1
+	.int8	59                              # DW_AT_decl_line
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	7                               # Abbreviation Code
+	.int8	36                              # DW_TAG_base_type
+	.int8	0                               # DW_CHILDREN_no
+	.int8	3                               # DW_AT_name
+	.int8	14                              # DW_FORM_strp
+	.int8	62                              # DW_AT_encoding
+	.int8	11                              # DW_FORM_data1
+	.int8	11                              # DW_AT_byte_size
+	.int8	11                              # DW_FORM_data1
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	8                               # Abbreviation Code
+	.int8	15                              # DW_TAG_pointer_type
+	.int8	0                               # DW_CHILDREN_no
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	9                               # Abbreviation Code
+	.int8	38                              # DW_TAG_const_type
+	.int8	0                               # DW_CHILDREN_no
+	.int8	73                              # DW_AT_type
+	.int8	19                              # DW_FORM_ref4
+	.int8	0                               # EOM(1)
+	.int8	0                               # EOM(2)
+	.int8	0                               # EOM(3)
+	.section	.debug_info,"",@
+.Lcu_begin0:
+	.int32	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.int16	4                               # DWARF version number
+	.int32	.debug_abbrev0                  # Offset Into Abbrev. Section
+	.int8	4                               # Address Size (in bytes)
+	.int8	1                               # Abbrev [1] 0xb:0xb5 DW_TAG_compile_unit
+	.int32	.Linfo_string0                  # DW_AT_producer
+	.int16	33                              # DW_AT_language
+	.int32	.Linfo_string1                  # DW_AT_name
+	.int32	.Lline_table_start0             # DW_AT_stmt_list
+	.int32	.Linfo_string2                  # DW_AT_comp_dir
+	.int32	.Lfunc_begin0                   # DW_AT_low_pc
+	.int32	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.int8	2                               # Abbrev [2] 0x26:0x6a DW_TAG_subprogram
+	.int32	.Lfunc_begin0                   # DW_AT_low_pc
+	.int32	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.int8	4                               # DW_AT_frame_base
+	.int8	237
+	.int8	0
+	.int8	5
+	.int8	159
+	.int32	.Linfo_string3                  # DW_AT_linkage_name
+	.int32	.Linfo_string4                  # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	2                               # DW_AT_decl_line
+	.int32	144                             # DW_AT_type
+                                        # DW_AT_external
+	.int8	3                               # Abbrev [3] 0x42:0xe DW_TAG_formal_parameter
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	24
+	.int32	.Linfo_string6                  # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	2                               # DW_AT_decl_line
+	.int32	151                             # DW_AT_type
+	.int8	3                               # Abbrev [3] 0x50:0xe DW_TAG_formal_parameter
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	20
+	.int32	.Linfo_string8                  # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	2                               # DW_AT_decl_line
+	.int32	172                             # DW_AT_type
+	.int8	3                               # Abbrev [3] 0x5e:0xe DW_TAG_formal_parameter
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	19
+	.int32	.Linfo_string10                 # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	2                               # DW_AT_decl_line
+	.int32	179                             # DW_AT_type
+	.int8	4                               # Abbrev [4] 0x6c:0x18 DW_TAG_lexical_block
+	.int32	.Ltmp2                          # DW_AT_low_pc
+	.int32	.Ltmp3-.Ltmp2                   # DW_AT_high_pc
+	.int8	5                               # Abbrev [5] 0x75:0xe DW_TAG_variable
+	.int8	2                               # DW_AT_location
+	.int8	145
+	.int8	12
+	.int32	.Linfo_string12                 # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	5                               # DW_AT_decl_line
+	.int32	186                             # DW_AT_type
+	.int8	0                               # End Of Children Mark
+	.int8	6                               # Abbrev [6] 0x84:0xb DW_TAG_typedef
+	.int32	144                             # DW_AT_type
+	.int32	.Linfo_string13                 # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	4                               # DW_AT_decl_line
+	.int8	0                               # End Of Children Mark
+	.int8	7                               # Abbrev [7] 0x90:0x7 DW_TAG_base_type
+	.int32	.Linfo_string5                  # DW_AT_name
+	.int8	5                               # DW_AT_encoding
+	.int8	4                               # DW_AT_byte_size
+	.int8	6                               # Abbrev [6] 0x97:0xb DW_TAG_typedef
+	.int32	162                             # DW_AT_type
+	.int32	.Linfo_string7                  # DW_AT_name
+	.int8	1                               # DW_AT_decl_file
+	.int8	1                               # DW_AT_decl_line
+	.int8	8                               # Abbrev [8] 0xa2:0x5 DW_TAG_pointer_type
+	.int32	167                             # DW_AT_type
+	.int8	9                               # Abbrev [9] 0xa7:0x5 DW_TAG_const_type
+	.int32	144                             # DW_AT_type
+	.int8	7                               # Abbrev [7] 0xac:0x7 DW_TAG_base_type
+	.int32	.Linfo_string9                  # DW_AT_name
+	.int8	7                               # DW_AT_encoding
+	.int8	4                               # DW_AT_byte_size
+	.int8	7                               # Abbrev [7] 0xb3:0x7 DW_TAG_base_type
+	.int32	.Linfo_string11                 # DW_AT_name
+	.int8	2                               # DW_AT_encoding
+	.int8	1                               # DW_AT_byte_size
+	.int8	9                               # Abbrev [9] 0xba:0x5 DW_TAG_const_type
+	.int32	132                             # DW_AT_type
+	.int8	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_str,"S",@
+.Linfo_string0:
+	.asciz	"clang version 19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)" # string offset=0
+.Linfo_string1:
+	.asciz	"test.cpp"                      # string offset=111
+.Linfo_string2:
+	.asciz	"/data/projects/scripts/regression-suite/input/general" # string offset=120
+.Linfo_string3:
+	.asciz	"_Z3fooPKijb"                   # string offset=174
+.Linfo_string4:
+	.asciz	"foo"                           # string offset=186
+.Linfo_string5:
+	.asciz	"int"                           # string offset=190
+.Linfo_string6:
+	.asciz	"ParamPtr"                      # string offset=194
+.Linfo_string7:
+	.asciz	"INTPTR"                        # string offset=203
+.Linfo_string8:
+	.asciz	"ParamUnsigned"                 # string offset=210
+.Linfo_string9:
+	.asciz	"unsigned int"                  # string offset=224
+.Linfo_string10:
+	.asciz	"ParamBool"                     # string offset=237
+.Linfo_string11:
+	.asciz	"bool"                          # string offset=247
+.Linfo_string12:
+	.asciz	"CONSTANT"                      # string offset=252
+.Linfo_string13:
+	.asciz	"INTEGER"                       # string offset=261
+	.ident	"clang version 19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)"
+	.section	.custom_section.producers,"",@
+	.int8	2
+	.int8	8
+	.ascii	"language"
+	.int8	1
+	.int8	14
+	.ascii	"C_plus_plus_14"
+	.int8	0
+	.int8	12
+	.ascii	"processed-by"
+	.int8	1
+	.int8	5
+	.ascii	"clang"
+	.int8	96
+	.ascii	"19.0.0git (/data/projects/llvm-root/llvm-project/clang 2db6703f0c257d293df455e2dff8c1fb695c4100)"
+	.section	.debug_str,"S",@
+	.section	.custom_section.target_features,"",@
+	.int8	2
+	.int8	43
+	.int8	15
+	.ascii	"mutable-globals"
+	.int8	43
+	.int8	8
+	.ascii	"sign-ext"
+	.section	.debug_str,"S",@
+	.section	.debug_line,"",@
+.Lline_table_start0:
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/test.cpp b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/test.cpp
new file mode 100644
index 0000000..5cf39f4
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/test.cpp
@@ -0,0 +1,9 @@
+using INTPTR = const int *;
+int foo(INTPTR ParamPtr, unsigned ParamUnsigned, bool ParamBool) {
+  if (ParamBool) {
+    typedef int INTEGER;
+    const INTEGER CONSTANT = 7;
+    return CONSTANT;
+  }
+  return ParamUnsigned;
+}
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/README.txt b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/README.txt
new file mode 100644
index 0000000..6937bb0
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/README.txt
@@ -0,0 +1,28 @@
+Notes:
+------
+As we should avoid committing binaries (.wasm) to be used in tests,
+instead we provide the '.cpp' source files and the '.s' files.
+
+- For the tests, only the '.s' files are required.
+- We use the target 'wasm32' as the 'wasm64' is not standardized yet.
+
+How to generate .s from .cpp
+----------------------------
+Use clang to generate the '.s'.
+
+  clang --target=wasm32 -S -g Inputs/hello-world.cpp -o Inputs/hello-world-clang.s
+  clang --target=wasm32 -S -g Inputs/pr-43860.cpp    -o Inputs/pr-43860-clang.s
+  clang --target=wasm32 -S -g Inputs/pr-44884.cpp    -o Inputs/pr-44884-clang.s
+  clang --target=wasm32 -S -g Inputs/pr-46466.cpp    -o Inputs/pr-46466-clang.s
+  clang --target=wasm32 -S -g Inputs/test.cpp        -o Inputs/test-clang.s
+
+How to generate .o from .s
+--------------------------------
+Each test executes one of the following commands in order to generate
+the binary '.wasm' used by that specific test:
+
+  llvm-mc -arch=wasm32 -filetype=obj %p/Inputs/hello-world-clang.s -o hello-world-clang.o
+  llvm-mc -arch=wasm32 -filetype=obj %p/Inputs/pr-43860-clang.s    -o pr-43860-clang.o
+  llvm-mc -arch=wasm32 -filetype=obj %p/Inputs/pr-44884-clang.s    -o pr-44884-clang.o
+  llvm-mc -arch=wasm32 -filetype=obj %p/Inputs/pr-46466-clang.s    -o pr-46466-clang.o
+  llvm-mc -arch=wasm32 -filetype=obj %p/Inputs/test-clang.s        -o test-clang.o
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFive7/gpr-bypass.s b/llvm/test/tools/llvm-mca/RISCV/SiFive7/gpr-bypass.s
index 892a5d1..03f7de2 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SiFive7/gpr-bypass.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFive7/gpr-bypass.s
@@ -180,10 +180,10 @@ jr a0
 # CHECK-NEXT:  1      3     0.50                        sext.b	a0, a0
 # CHECK-NEXT:  1      3     0.50                        sext.h	a0, a0
 # CHECK-NEXT:  1      3     0.50                        zext.h	a0, a0
-# CHECK-NEXT:  1      3     0.50                        min	a0, a0, a0
-# CHECK-NEXT:  1      3     0.50                        minu	a0, a0, a0
-# CHECK-NEXT:  1      3     0.50                        max	a0, a0, a0
-# CHECK-NEXT:  1      3     0.50                        maxu	a0, a0, a0
+# CHECK-NEXT:  1      3     1.00                        min	a0, a0, a0
+# CHECK-NEXT:  1      3     1.00                        minu	a0, a0, a0
+# CHECK-NEXT:  1      3     1.00                        max	a0, a0, a0
+# CHECK-NEXT:  1      3     1.00                        maxu	a0, a0, a0
 # CHECK-NEXT:  1      3     1.00                        rol	a0, a0, a0
 # CHECK-NEXT:  1      3     1.00                        ror	a0, a0, a0
 # CHECK-NEXT:  1      3     1.00                        rori	a0, a0, 1
@@ -225,7 +225,7 @@ jr a0
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
-# CHECK-NEXT:  -      -     39.00  52.00   -      -      -      -
+# CHECK-NEXT:  -      -     37.00  54.00   -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
@@ -289,9 +289,9 @@ jr a0
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     sext.h	a0, a0
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -     zext.h	a0, a0
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     min	a0, a0, a0
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     minu	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     minu	a0, a0, a0
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     max	a0, a0, a0
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     maxu	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     maxu	a0, a0, a0
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     rol	a0, a0, a0
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     ror	a0, a0, a0
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     rori	a0, a0, 1
diff --git a/llvm/test/tools/llvm-profgen/Inputs/coff-profile.exe b/llvm/test/tools/llvm-profgen/Inputs/coff-profile.exe
new file mode 100644
index 0000000..309476a
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/coff-profile.exe
diff --git a/llvm/test/tools/llvm-profgen/Inputs/coff-profile.perfscript b/llvm/test/tools/llvm-profgen/Inputs/coff-profile.perfscript
new file mode 100644
index 0000000..96eb878c
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/coff-profile.perfscript
@@ -0,0 +1,13 @@
+PERF_RECORD_MMAP2 5752/0: [0x7ff70a1b0000(0x640000) @ 0x1000 00:00 0 0]: r-xp c:\Users\haohaiwe\Desktop\coff-profile.exe
+ 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 
+ 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 
+ 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 
+ 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 
+ 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 
+ 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 
+ 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 
+ 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 
+ 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 
+ 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 
+ 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 
+ 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 
diff --git a/llvm/test/tools/llvm-profgen/coff-profile.test b/llvm/test/tools/llvm-profgen/coff-profile.test
new file mode 100644
index 0000000..5578f73
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/coff-profile.test
@@ -0,0 +1,79 @@
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/coff-profile.perfscript --binary=%S/Inputs/coff-profile.exe --output=%t
+; RUN: FileCheck %s --input-file %t
+
+CHECK:      main:31837:0
+CHECK-NEXT:  0: 0
+CHECK-NEXT:  3.1: 0
+CHECK-NEXT:  3.2: 0
+CHECK-NEXT:  8: 0
+CHECK-NEXT:  65501: 0
+CHECK-NEXT:  1: ??$init@HG@MyNameSpace2@@YAXHPEAG@Z:0
+CHECK-NEXT:   1: 0
+CHECK-NEXT:   1.1: 0
+CHECK-NEXT:   1.2: 0
+CHECK-NEXT:   2: 0
+CHECK-NEXT:   65514: 0
+CHECK-NEXT:  4: ?work1@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:3193
+CHECK-NEXT:   0: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:3193
+CHECK-NEXT:    1.1: 31
+CHECK-NEXT:    1.2: 31
+CHECK-NEXT:    2: 31
+CHECK-NEXT:    3: 31
+CHECK-NEXT:    65530: 0
+CHECK-NEXT:  5: ?work2@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:28644
+CHECK-NEXT:   0: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:28644
+CHECK-NEXT:    1.1: 341
+CHECK-NEXT:    1.2: 341
+CHECK-NEXT:    2: 341
+CHECK-NEXT:    3: 341
+CHECK-NEXT:    65530: 0
+CHECK-NEXT:  7: ?print@MyNameSpace2@@YAXPEAGH@Z:0
+CHECK-NEXT:   1: 0
+
+; Original code
+; clang-cl.exe -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld -Xclang -fdebug-info-for-profiling -link -debug:dwarf
+
+#include <stdio.h>
+
+namespace MyNameSpace1 {
+
+template <typename T1, typename T2> class MyClass {
+  void work(T1 map[], T2 n, T2 m) {
+    for (int i = 1; i < n; i++) {
+      map[i] = map[i - 1] * map[i - 1];
+      map[i] += (i * map[i - 1]) / m + i % m;
+    }
+  }
+
+public:
+  void work1(T1 map[], T2 n) { work(map, n, 7); }
+  void work2(T1 map[], T2 n) { work(map, n, 3); }
+};
+
+} // namespace MyNameSpace1
+
+namespace MyNameSpace2 {
+
+template <typename T1, typename T2> void init(T1 c, T2 *p) {
+  for (int i = 0; i < c * 1000000; i++) {
+    p[i] = i / 3 + (i * i) % 3;
+  }
+}
+
+void print(unsigned short *p, int i) {
+  printf("%d %d %d\n", p[i * i * 100], p[i * i * 100 + 1], p[i * i * 100 + 2]);
+}
+
+} // namespace MyNameSpace2
+
+unsigned short M[3000000];
+int main(int argc, char *argv[]) {
+  MyNameSpace2::init(argc, M);
+  MyNameSpace1::MyClass<unsigned short, int> Obj;
+  for (int i = 0; i <= argc * 10; i++) {
+    Obj.work1(&M[argc], argc * 100000);
+    Obj.work2(&M[argc * argc], argc * 1000000);
+  }
+  MyNameSpace2::print(M, argc);
+  return 0;
+}
diff --git a/llvm/test/tools/llvm-readobj/ELF/machine-specific-section-types.test b/llvm/test/tools/llvm-readobj/ELF/machine-specific-section-types.test
index f952438..8ad50c5 100644
--- a/llvm/test/tools/llvm-readobj/ELF/machine-specific-section-types.test
+++ b/llvm/test/tools/llvm-readobj/ELF/machine-specific-section-types.test
@@ -17,6 +17,10 @@
 # RUN: llvm-readobj --section-headers %t-aarch64.o | FileCheck %s --check-prefix=AARCH64-LLVM
 # RUN: llvm-readelf --section-headers %t-aarch64.o | FileCheck %s --check-prefix=AARCH64-GNU
 
+# RUN: yaml2obj %s --docnum=5 -o %t-hexagon.o
+# RUN: llvm-readobj --section-headers %t-hexagon.o | FileCheck %s --check-prefix=HEXAGON-LLVM
+# RUN: llvm-readelf --section-headers %t-hexagon.o | FileCheck %s --check-prefix=HEXAGON-GNU
+
 # ARM-LLVM: Name: exidx
 # ARM-LLVM: Type: SHT_ARM_EXIDX
 # ARM-LLVM: Name: preemptmap
@@ -64,6 +68,11 @@
 # AARCH64-GNU: .memtag.globals.dynamic AARCH64_MEMTAG_GLOBALS_DYNAMIC
 # AARCH64-GNU: .memtag.globals.static  AARCH64_MEMTAG_GLOBALS_STATIC
 
+# HEXAGON-LLVM: Name: hexagon_ordered
+# HEXAGON-LLVM: Type: SHT_HEX_ORDERED
+
+# HEXAGON-GNU: hexagon_ordered HEX_ORDERED
+
 --- !ELF
 FileHeader:
   Class:   ELFCLASS64
@@ -122,3 +131,13 @@ Sections:
     Type:  SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC
   - Name:  .memtag.globals.static
     Type:  SHT_AARCH64_MEMTAG_GLOBALS_STATIC
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS32
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_HEXAGON
+Sections:
+  - Name: hexagon_ordered
+    Type: SHT_HEX_ORDERED
diff --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp
index 1c869e1..fd85256 100644
--- a/llvm/tools/llvm-as/llvm-as.cpp
+++ b/llvm/tools/llvm-as/llvm-as.cpp
@@ -67,6 +67,7 @@ static cl::opt<std::string> ClDataLayout("data-layout",
                                          cl::desc("data layout string to use"),
                                          cl::value_desc("layout-string"),
                                          cl::init(""), cl::cat(AsCat));
+extern bool WriteNewDbgInfoFormatToBitcode;
 
 static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) {
   // Infer the output filename if needed.
@@ -139,6 +140,12 @@ int main(int argc, char **argv) {
     Err.print(argv[0], errs());
     return 1;
   }
+
+  // Convert to new debug format if requested.
+  assert(!M->IsNewDbgInfoFormat && "Unexpectedly in new debug mode");
+  if (UseNewDbgInfoFormat && WriteNewDbgInfoFormatToBitcode)
+    M->convertToNewDbgValues();
+
   std::unique_ptr<ModuleSummaryIndex> Index = std::move(ModuleAndIndex.Index);
 
   if (!DisableVerify) {
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index c6fcf7e..8781476 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -408,9 +408,22 @@ PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary,
           PerfContent::UnknownContent};
 }
 
+static StringRef filename(StringRef Path, bool UseBackSlash) {
+  llvm::sys::path::Style PathStyle =
+      UseBackSlash ? llvm::sys::path::Style::windows_backslash
+                   : llvm::sys::path::Style::native;
+  StringRef FileName = llvm::sys::path::filename(Path, PathStyle);
+
+  // In case this file use \r\n as newline.
+  if (UseBackSlash && FileName.back() == '\r')
+    return FileName.drop_back();
+
+  return FileName;
+}
+
 void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) {
   // Drop the event which doesn't belong to user-provided binary
-  StringRef BinaryName = llvm::sys::path::filename(Event.BinaryPath);
+  StringRef BinaryName = filename(Event.BinaryPath, Binary->isCOFF());
   if (Binary->getName() != BinaryName)
     return;
 
@@ -975,7 +988,7 @@ bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary,
            << format("0x%" PRIx64 ":", MMap.Address) << " \n";
   }
 
-  StringRef BinaryName = llvm::sys::path::filename(MMap.BinaryPath);
+  StringRef BinaryName = filename(MMap.BinaryPath, Binary->isCOFF());
   return Binary->getName() == BinaryName;
 }
 
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index f622286..1baf358 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Demangle/Demangle.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/MC/TargetRegistry.h"
+#include "llvm/Object/COFF.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
@@ -211,10 +212,11 @@ void ProfiledBinary::load() {
   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
   Binary &ExeBinary = *OBinary.getBinary();
 
-  auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
-  if (!Obj)
-    exitWithError("not a valid Elf image", Path);
+  IsCOFF = isa<COFFObjectFile>(&ExeBinary);
+  if (!isa<ELFObjectFileBase>(&ExeBinary) && !IsCOFF)
+    exitWithError("not a valid ELF/COFF image", Path);
 
+  auto *Obj = cast<ObjectFile>(&ExeBinary);
   TheTriple = Obj->makeTriple();
 
   LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
@@ -236,13 +238,14 @@ void ProfiledBinary::load() {
   DisassembleFunctionSet.insert(DisassembleFunctions.begin(),
                                 DisassembleFunctions.end());
 
-  checkPseudoProbe(Obj);
+  if (auto *ELFObj = dyn_cast<ELFObjectFileBase>(Obj)) {
+    checkPseudoProbe(ELFObj);
+    if (UsePseudoProbes)
+      populateElfSymbolAddressList(ELFObj);
 
-  if (UsePseudoProbes)
-    populateElfSymbolAddressList(Obj);
-
-  if (ShowDisassemblyOnly)
-    decodePseudoProbe(Obj);
+    if (ShowDisassemblyOnly)
+      decodePseudoProbe(ELFObj);
+  }
 
   // Disassemble the text sections.
   disassemble(Obj);
@@ -335,18 +338,35 @@ void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
     exitWithError("no executable segment found", FileName);
 }
 
-void ProfiledBinary::setPreferredTextSegmentAddresses(
-    const ELFObjectFileBase *Obj) {
+void ProfiledBinary::setPreferredTextSegmentAddresses(const COFFObjectFile *Obj,
+                                                      StringRef FileName) {
+  uint64_t ImageBase = Obj->getImageBase();
+  if (!ImageBase)
+    exitWithError("Not a COFF image", FileName);
+
+  PreferredTextSegmentAddresses.push_back(ImageBase);
+  FirstLoadableAddress = ImageBase;
+
+  for (SectionRef Section : Obj->sections()) {
+    const coff_section *Sec = Obj->getCOFFSection(Section);
+    if (Sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE)
+      TextSegmentOffsets.push_back(Sec->VirtualAddress);
+  }
+}
+
+void ProfiledBinary::setPreferredTextSegmentAddresses(const ObjectFile *Obj) {
   if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
   else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
   else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
-  else if (const auto *ELFObj = cast<ELF64BEObjectFile>(Obj))
+  else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
+  else if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj))
+    setPreferredTextSegmentAddresses(COFFObj, Obj->getFileName());
   else
-    llvm_unreachable("invalid ELF object format");
+    llvm_unreachable("invalid object format");
 }
 
 void ProfiledBinary::checkPseudoProbe(const ELFObjectFileBase *Obj) {
@@ -442,7 +462,7 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
 void ProfiledBinary::decodePseudoProbe() {
   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
   Binary &ExeBinary = *OBinary.getBinary();
-  auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
+  auto *Obj = cast<ELFObjectFileBase>(&ExeBinary);
   decodePseudoProbe(Obj);
 }
 
@@ -593,7 +613,7 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
   return true;
 }
 
-void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
+void ProfiledBinary::setUpDisassembler(const ObjectFile *Obj) {
   const Target *TheTarget = getTarget(Obj);
   std::string TripleName = TheTriple.getTriple();
   StringRef FileName = Obj->getFileName();
@@ -635,7 +655,7 @@ void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
   IPrinter->setPrintBranchImmAsAddress(true);
 }
 
-void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
+void ProfiledBinary::disassemble(const ObjectFile *Obj) {
   // Set up disassembler and related components.
   setUpDisassembler(Obj);
 
@@ -687,7 +707,7 @@ void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
              << "]:\n\n";
     }
 
-    if (SectionName == ".plt")
+    if (isa<ELFObjectFileBase>(Obj) && SectionName == ".plt")
       continue;
 
     // Get the section data.
@@ -722,8 +742,7 @@ void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
 }
 
 void ProfiledBinary::checkUseFSDiscriminator(
-    const ELFObjectFileBase *Obj,
-    std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
+    const ObjectFile *Obj, std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
   const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
        SI != SE; ++SI) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 0fd12f5..5d2088a 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -297,22 +297,26 @@ class ProfiledBinary {
   // Use to avoid redundant warning.
   bool MissingMMapWarned = false;
 
-  void setPreferredTextSegmentAddresses(const ELFObjectFileBase *O);
+  bool IsCOFF = false;
+
+  void setPreferredTextSegmentAddresses(const ObjectFile *O);
 
   template <class ELFT>
   void setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
                                         StringRef FileName);
+  void setPreferredTextSegmentAddresses(const COFFObjectFile *Obj,
+                                        StringRef FileName);
 
   void checkPseudoProbe(const ELFObjectFileBase *Obj);
 
   void decodePseudoProbe(const ELFObjectFileBase *Obj);
 
   void
-  checkUseFSDiscriminator(const ELFObjectFileBase *Obj,
+  checkUseFSDiscriminator(const ObjectFile *Obj,
                           std::map<SectionRef, SectionSymbolsTy> &AllSymbols);
 
   // Set up disassembler and related components.
-  void setUpDisassembler(const ELFObjectFileBase *Obj);
+  void setUpDisassembler(const ObjectFile *Obj);
   symbolize::LLVMSymbolizer::Options getSymbolizerOpts() const;
 
   // Load debug info of subprograms from DWARF section.
@@ -333,7 +337,7 @@ class ProfiledBinary {
   void warnNoFuncEntry();
 
   /// Dissassemble the text section and build various address maps.
-  void disassemble(const ELFObjectFileBase *O);
+  void disassemble(const ObjectFile *O);
 
   /// Helper function to dissassemble the symbol and extract info for unwinding
   bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
@@ -362,6 +366,8 @@ public:
   uint64_t getBaseAddress() const { return BaseAddress; }
   void setBaseAddress(uint64_t Address) { BaseAddress = Address; }
 
+  bool isCOFF() const { return IsCOFF; }
+
   // Canonicalize to use preferred load address as base address.
   uint64_t canonicalizeVirtualAddress(uint64_t Address) {
     return Address - BaseAddress + getPreferredBaseAddress();
diff --git a/llvm/tools/llvm-shlib/CMakeLists.txt b/llvm/tools/llvm-shlib/CMakeLists.txt
index eba1672..9adce06 100644
--- a/llvm/tools/llvm-shlib/CMakeLists.txt
+++ b/llvm/tools/llvm-shlib/CMakeLists.txt
@@ -35,8 +35,7 @@ if(LLVM_BUILD_LLVM_DYLIB)
   endif()
   add_llvm_library(LLVM SHARED DISABLE_LLVM_LINK_LLVM_DYLIB OUTPUT_NAME LLVM ${INSTALL_WITH_TOOLCHAIN} ${SOURCES})
   # Add symlink for backwards compatibility with old library name
-  get_target_property(LLVM_DYLIB_SOVERSION LLVM SOVERSION)
-  llvm_install_library_symlink(LLVM-${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX} LLVM SHARED COMPONENT LLVM SOVERSION ${LLVM_DYLIB_SOVERSION})
+  llvm_install_library_symlink(LLVM-${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX} $<TARGET_SONAME_FILE_NAME:LLVM> SHARED FULL_DEST COMPONENT LLVM)
 
   list(REMOVE_DUPLICATES LIB_NAMES)
   if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
diff --git a/llvm/tools/verify-uselistorder/verify-uselistorder.cpp b/llvm/tools/verify-uselistorder/verify-uselistorder.cpp
index 9afe681..d929ae0 100644
--- a/llvm/tools/verify-uselistorder/verify-uselistorder.cpp
+++ b/llvm/tools/verify-uselistorder/verify-uselistorder.cpp
@@ -166,6 +166,11 @@ std::unique_ptr<Module> TempFile::readBitcode(LLVMContext &Context) const {
                           "verify-uselistorder: error: ");
     return nullptr;
   }
+
+  // verify-uselistoder currently only supports old-style debug info mode.
+  // FIXME: Update mapping code for RemoveDIs.
+  assert(!ModuleOr.get()->IsNewDbgInfoFormat &&
+         "Unexpectedly in new debug info mode");
   return std::move(ModuleOr.get());
 }
 
@@ -175,6 +180,9 @@ std::unique_ptr<Module> TempFile::readAssembly(LLVMContext &Context) const {
   std::unique_ptr<Module> M = parseAssemblyFile(Filename, Err, Context);
   if (!M.get())
     Err.print("verify-uselistorder", errs());
+  // verify-uselistoder currently only supports old-style debug info mode.
+  // FIXME: Update mapping code for RemoveDIs.
+  assert(!M->IsNewDbgInfoFormat && "Unexpectedly in new debug info mode");
   return M;
 }
 
@@ -541,6 +549,9 @@ int main(int argc, char **argv) {
 
   // Load the input module...
   std::unique_ptr<Module> M = parseIRFile(InputFilename, Err, Context);
+  // verify-uselistoder currently only supports old-style debug info mode.
+  // FIXME: Update mapping code for RemoveDIs.
+  assert(!M->IsNewDbgInfoFormat && "Unexpectedly in new debug info mode");
 
   if (!M.get()) {
     Err.print(argv[0], errs());
diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp
index 11237d2..83cbb02 100644
--- a/llvm/unittests/ADT/APIntTest.cpp
+++ b/llvm/unittests/ADT/APIntTest.cpp
@@ -14,6 +14,8 @@
 #include "llvm/Support/Alignment.h"
 #include "gtest/gtest.h"
 #include <array>
+#include <climits>
+#include <limits>
 #include <optional>
 
 using namespace llvm;
@@ -2911,6 +2913,91 @@ TEST(APIntTest, RoundingSDiv) {
   }
 }
 
+TEST(APIntTest, Average) {
+  APInt A0(32, 0);
+  APInt A2(32, 2);
+  APInt A100(32, 100);
+  APInt A101(32, 101);
+  APInt A200(32, 200, false);
+  APInt ApUMax = APInt::getMaxValue(32);
+
+  EXPECT_EQ(APInt(32, 150), APIntOps::avgFloorU(A100, A200));
+  EXPECT_EQ(APIntOps::RoundingUDiv(A100 + A200, A2, APInt::Rounding::DOWN),
+            APIntOps::avgFloorU(A100, A200));
+  EXPECT_EQ(APIntOps::RoundingUDiv(A100 + A200, A2, APInt::Rounding::UP),
+            APIntOps::avgCeilU(A100, A200));
+  EXPECT_EQ(APIntOps::RoundingUDiv(A100 + A101, A2, APInt::Rounding::DOWN),
+            APIntOps::avgFloorU(A100, A101));
+  EXPECT_EQ(APIntOps::RoundingUDiv(A100 + A101, A2, APInt::Rounding::UP),
+            APIntOps::avgCeilU(A100, A101));
+  EXPECT_EQ(A0, APIntOps::avgFloorU(A0, A0));
+  EXPECT_EQ(A0, APIntOps::avgCeilU(A0, A0));
+  EXPECT_EQ(ApUMax, APIntOps::avgFloorU(ApUMax, ApUMax));
+  EXPECT_EQ(ApUMax, APIntOps::avgCeilU(ApUMax, ApUMax));
+  EXPECT_EQ(APIntOps::RoundingUDiv(ApUMax, A2, APInt::Rounding::DOWN),
+            APIntOps::avgFloorU(A0, ApUMax));
+  EXPECT_EQ(APIntOps::RoundingUDiv(ApUMax, A2, APInt::Rounding::UP),
+            APIntOps::avgCeilU(A0, ApUMax));
+
+  APInt Ap100(32, +100);
+  APInt Ap101(32, +101);
+  APInt Ap200(32, +200);
+  APInt Am1(32, -1);
+  APInt Am100(32, -100);
+  APInt Am101(32, -101);
+  APInt Am200(32, -200);
+  APInt AmSMin = APInt::getSignedMinValue(32);
+  APInt ApSMax = APInt::getSignedMaxValue(32);
+
+  EXPECT_EQ(APInt(32, +150), APIntOps::avgFloorS(Ap100, Ap200));
+  EXPECT_EQ(APIntOps::RoundingSDiv(Ap100 + Ap200, A2, APInt::Rounding::DOWN),
+            APIntOps::avgFloorS(Ap100, Ap200));
+  EXPECT_EQ(APIntOps::RoundingSDiv(Ap100 + Ap200, A2, APInt::Rounding::UP),
+            APIntOps::avgCeilS(Ap100, Ap200));
+
+  EXPECT_EQ(APInt(32, -150), APIntOps::avgFloorS(Am100, Am200));
+  EXPECT_EQ(APIntOps::RoundingSDiv(Am100 + Am200, A2, APInt::Rounding::DOWN),
+            APIntOps::avgFloorS(Am100, Am200));
+  EXPECT_EQ(APIntOps::RoundingSDiv(Am100 + Am200, A2, APInt::Rounding::UP),
+            APIntOps::avgCeilS(Am100, Am200));
+
+  EXPECT_EQ(APInt(32, +100), APIntOps::avgFloorS(Ap100, Ap101));
+  EXPECT_EQ(APIntOps::RoundingSDiv(Ap100 + Ap101, A2, APInt::Rounding::DOWN),
+            APIntOps::avgFloorS(Ap100, Ap101));
+  EXPECT_EQ(APInt(32, +101), APIntOps::avgCeilS(Ap100, Ap101));
+  EXPECT_EQ(APIntOps::RoundingSDiv(Ap100 + Ap101, A2, APInt::Rounding::UP),
+            APIntOps::avgCeilS(Ap100, Ap101));
+
+  EXPECT_EQ(APInt(32, -101), APIntOps::avgFloorS(Am100, Am101));
+  EXPECT_EQ(APIntOps::RoundingSDiv(Am100 + Am101, A2, APInt::Rounding::DOWN),
+            APIntOps::avgFloorS(Am100, Am101));
+  EXPECT_EQ(APInt(32, -100), APIntOps::avgCeilS(Am100, Am101));
+  EXPECT_EQ(APIntOps::RoundingSDiv(Am100 + Am101, A2, APInt::Rounding::UP),
+            APIntOps::avgCeilS(Am100, Am101));
+
+  EXPECT_EQ(AmSMin, APIntOps::avgFloorS(AmSMin, AmSMin));
+  EXPECT_EQ(AmSMin, APIntOps::avgCeilS(AmSMin, AmSMin));
+
+  EXPECT_EQ(APIntOps::RoundingSDiv(AmSMin, A2, APInt::Rounding::DOWN),
+            APIntOps::avgFloorS(A0, AmSMin));
+  EXPECT_EQ(APIntOps::RoundingSDiv(AmSMin, A2, APInt::Rounding::UP),
+            APIntOps::avgCeilS(A0, AmSMin));
+
+  EXPECT_EQ(A0, APIntOps::avgFloorS(A0, A0));
+  EXPECT_EQ(A0, APIntOps::avgCeilS(A0, A0));
+
+  EXPECT_EQ(Am1, APIntOps::avgFloorS(AmSMin, ApSMax));
+  EXPECT_EQ(A0, APIntOps::avgCeilS(AmSMin, ApSMax));
+
+  EXPECT_EQ(APIntOps::RoundingSDiv(ApSMax, A2, APInt::Rounding::DOWN),
+            APIntOps::avgFloorS(A0, ApSMax));
+  EXPECT_EQ(APIntOps::RoundingSDiv(ApSMax, A2, APInt::Rounding::UP),
+            APIntOps::avgCeilS(A0, ApSMax));
+
+  EXPECT_EQ(ApSMax, APIntOps::avgFloorS(ApSMax, ApSMax));
+  EXPECT_EQ(ApSMax, APIntOps::avgCeilS(ApSMax, ApSMax));
+}
+
 TEST(APIntTest, umul_ov) {
   const std::pair<uint64_t, uint64_t> Overflows[] = {
       {0x8000000000000000, 2},
@@ -2962,6 +3049,69 @@ TEST(APIntTest, smul_ov) {
       }
 }
 
+TEST(APIntTest, sfloordiv_ov) {
+  // int16 test overflow
+  {
+    using IntTy = int16_t;
+    APInt divisor(8 * sizeof(IntTy), std::numeric_limits<IntTy>::lowest(),
+                  true);
+    APInt dividend(8 * sizeof(IntTy), IntTy(-1), true);
+    bool Overflow = false;
+    (void)divisor.sfloordiv_ov(dividend, Overflow);
+    EXPECT_TRUE(Overflow);
+  }
+  // int32 test overflow
+  {
+    using IntTy = int32_t;
+    APInt divisor(8 * sizeof(IntTy), std::numeric_limits<IntTy>::lowest(),
+                  true);
+    APInt dividend(8 * sizeof(IntTy), IntTy(-1), true);
+    bool Overflow = false;
+    (void)divisor.sfloordiv_ov(dividend, Overflow);
+    EXPECT_TRUE(Overflow);
+  }
+  // int64 test overflow
+  {
+    using IntTy = int64_t;
+    APInt divisor(8 * sizeof(IntTy), std::numeric_limits<IntTy>::lowest(),
+                  true);
+    APInt dividend(8 * sizeof(IntTy), IntTy(-1), true);
+    bool Overflow = false;
+    (void)divisor.sfloordiv_ov(dividend, Overflow);
+    EXPECT_TRUE(Overflow);
+  }
+  // test all of int8
+  {
+    bool Overflow = false;
+    for (int i = -128; i < 128; ++i) {
+      for (int j = -128; j < 128; ++j) {
+        if (j == 0)
+          continue;
+
+        int8_t a = static_cast<int8_t>(i);
+        int8_t b = static_cast<int8_t>(j);
+
+        APInt divisor(8, a, true);
+        APInt dividend(8, b, true);
+        APInt quotient = divisor.sfloordiv_ov(dividend, Overflow);
+
+        if (i == -128 && j == -1) {
+          EXPECT_TRUE(Overflow);
+          continue;
+        }
+
+        if (((i >= 0 && j > 0) || (i <= 0 && j < 0)) ||
+            (i % j == 0)) // if quotient >= 0 and remain == 0 floordiv
+                          // equivalent to div
+          EXPECT_EQ(quotient.getSExtValue(), a / b);
+        else
+          EXPECT_EQ(quotient.getSExtValue(), a / b - 1);
+        EXPECT_FALSE(Overflow);
+      }
+    }
+  }
+}
+
 TEST(APIntTest, SolveQuadraticEquationWrap) {
   // Verify that "Solution" is the first non-negative integer that solves
   // Ax^2 + Bx + C = "0 or overflow", i.e. that it is a correct solution
diff --git a/llvm/unittests/CodeGen/LowLevelTypeTest.cpp b/llvm/unittests/CodeGen/LowLevelTypeTest.cpp
index cb34802..b60d82b 100644
--- a/llvm/unittests/CodeGen/LowLevelTypeTest.cpp
+++ b/llvm/unittests/CodeGen/LowLevelTypeTest.cpp
@@ -18,6 +18,24 @@ using namespace llvm;
 
 namespace {
 
+TEST(LowLevelTypeTest, Token) {
+  LLVMContext C;
+  DataLayout DL("");
+
+  const LLT TTy = LLT::token();
+
+  // Test kind.
+  EXPECT_TRUE(TTy.isValid());
+  EXPECT_TRUE(TTy.isScalar());
+  EXPECT_TRUE(TTy.isToken());
+
+  EXPECT_FALSE(TTy.isPointer());
+  EXPECT_FALSE(TTy.isVector());
+
+  const LLT STy = LLT::scalar(0);
+  EXPECT_EQ(STy, TTy);
+}
+
 TEST(LowLevelTypeTest, Scalar) {
   LLVMContext C;
   DataLayout DL("");
@@ -32,6 +50,8 @@ TEST(LowLevelTypeTest, Scalar) {
     ASSERT_FALSE(Ty.isPointer());
     ASSERT_FALSE(Ty.isVector());
 
+    EXPECT_TRUE(S != 0 || Ty.isToken());
+
     // Test sizes.
     EXPECT_EQ(S, Ty.getSizeInBits());
     EXPECT_EQ(S, Ty.getScalarSizeInBits());
@@ -77,6 +97,7 @@ TEST(LowLevelTypeTest, Vector) {
 
       ASSERT_FALSE(VTy.isScalar());
       ASSERT_FALSE(VTy.isPointer());
+      ASSERT_FALSE(VTy.isToken());
 
       // Test sizes.
       EXPECT_EQ(S, VTy.getScalarSizeInBits());
@@ -300,6 +321,7 @@ TEST(LowLevelTypeTest, Invalid) {
   ASSERT_FALSE(Ty.isScalar());
   ASSERT_FALSE(Ty.isPointer());
   ASSERT_FALSE(Ty.isVector());
+  ASSERT_FALSE(Ty.isToken());
 }
 
 TEST(LowLevelTypeTest, Divide) {
diff --git a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
index 2b764c9..180d430 100644
--- a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
+++ b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
@@ -133,6 +133,10 @@ TEST_F(SelectionDAGPatternMatchTest, matchBinaryOp) {
   SDValue And = DAG->getNode(ISD::AND, DL, Int32VT, Op0, Op1);
   SDValue Xor = DAG->getNode(ISD::XOR, DL, Int32VT, Op1, Op0);
   SDValue Or  = DAG->getNode(ISD::OR, DL, Int32VT, Op0, Op1);
+  SDValue SMax = DAG->getNode(ISD::SMAX, DL, Int32VT, Op0, Op1);
+  SDValue SMin = DAG->getNode(ISD::SMIN, DL, Int32VT, Op1, Op0);
+  SDValue UMax = DAG->getNode(ISD::UMAX, DL, Int32VT, Op0, Op1);
+  SDValue UMin = DAG->getNode(ISD::UMIN, DL, Int32VT, Op1, Op0);
 
   SDValue SFAdd = DAG->getNode(ISD::STRICT_FADD, DL, {Float32VT, MVT::Other},
                                {DAG->getEntryNode(), Op2, Op2});
@@ -155,6 +159,15 @@ TEST_F(SelectionDAGPatternMatchTest, matchBinaryOp) {
   EXPECT_TRUE(sd_match(Or, m_c_BinOp(ISD::OR, m_Value(), m_Value())));
   EXPECT_TRUE(sd_match(Or, m_Or(m_Value(), m_Value())));
 
+  EXPECT_TRUE(sd_match(SMax, m_c_BinOp(ISD::SMAX, m_Value(), m_Value())));
+  EXPECT_TRUE(sd_match(SMax, m_SMax(m_Value(), m_Value())));
+  EXPECT_TRUE(sd_match(SMin, m_c_BinOp(ISD::SMIN, m_Value(), m_Value())));
+  EXPECT_TRUE(sd_match(SMin, m_SMin(m_Value(), m_Value())));
+  EXPECT_TRUE(sd_match(UMax, m_c_BinOp(ISD::UMAX, m_Value(), m_Value())));
+  EXPECT_TRUE(sd_match(UMax, m_UMax(m_Value(), m_Value())));
+  EXPECT_TRUE(sd_match(UMin, m_c_BinOp(ISD::UMIN, m_Value(), m_Value())));
+  EXPECT_TRUE(sd_match(UMin, m_UMin(m_Value(), m_Value())));
+
   SDValue BindVal;
   EXPECT_TRUE(sd_match(SFAdd, m_ChainedBinOp(ISD::STRICT_FADD, m_Value(BindVal),
                                              m_Deferred(BindVal))));
diff --git a/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt b/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt
index f102ba5..8a6a26b 100644
--- a/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt
+++ b/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_unittest(OrcJITTests
   JITTargetMachineBuilderTest.cpp
   LazyCallThroughAndReexportsTest.cpp
   LookupAndRecordAddrsTest.cpp
+  MachOPlatformTest.cpp
   MapperJITLinkMemoryManagerTest.cpp
   MemoryMapperTest.cpp
   ObjectFormatsTest.cpp
diff --git a/llvm/unittests/ExecutionEngine/Orc/MachOPlatformTest.cpp b/llvm/unittests/ExecutionEngine/Orc/MachOPlatformTest.cpp
new file mode 100644
index 0000000..bf6c104
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/Orc/MachOPlatformTest.cpp
@@ -0,0 +1,56 @@
+//===---------- MachOPlatformTest.cpp - MachPlatform API Tests ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::orc;
+
+TEST(MachOPlatformTests, BuildVersionOptsFromTriple) {
+
+  auto darwinOS = MachOPlatform::HeaderOptions::BuildVersionOpts::fromTriple(
+      Triple("arm64-apple-darwin"), 0, 0);
+  EXPECT_FALSE(darwinOS);
+
+  auto macOS = MachOPlatform::HeaderOptions::BuildVersionOpts::fromTriple(
+      Triple("arm64-apple-macosx"), 0, 0);
+  EXPECT_TRUE(macOS);
+  EXPECT_EQ(macOS->Platform, MachO::PLATFORM_MACOS);
+
+  auto iOS = MachOPlatform::HeaderOptions::BuildVersionOpts::fromTriple(
+      Triple("arm64-apple-ios"), 0, 0);
+  EXPECT_TRUE(iOS);
+  EXPECT_EQ(iOS->Platform, MachO::PLATFORM_IOS);
+
+  auto iOSSim = MachOPlatform::HeaderOptions::BuildVersionOpts::fromTriple(
+      Triple("arm64-apple-ios-simulator"), 0, 0);
+  EXPECT_TRUE(iOSSim);
+  EXPECT_EQ(iOSSim->Platform, MachO::PLATFORM_IOSSIMULATOR);
+
+  auto tvOS = MachOPlatform::HeaderOptions::BuildVersionOpts::fromTriple(
+      Triple("arm64-apple-tvos"), 0, 0);
+  EXPECT_TRUE(tvOS);
+  EXPECT_EQ(tvOS->Platform, MachO::PLATFORM_TVOS);
+
+  auto tvOSSim = MachOPlatform::HeaderOptions::BuildVersionOpts::fromTriple(
+      Triple("arm64-apple-tvos-simulator"), 0, 0);
+  EXPECT_TRUE(tvOSSim);
+  EXPECT_EQ(tvOSSim->Platform, MachO::PLATFORM_TVOSSIMULATOR);
+
+  auto watchOS = MachOPlatform::HeaderOptions::BuildVersionOpts::fromTriple(
+      Triple("arm64-apple-watchos"), 0, 0);
+  EXPECT_TRUE(watchOS);
+  EXPECT_EQ(watchOS->Platform, MachO::PLATFORM_WATCHOS);
+
+  auto watchOSSim = MachOPlatform::HeaderOptions::BuildVersionOpts::fromTriple(
+      Triple("arm64-apple-watchos-simulator"), 0, 0);
+  EXPECT_TRUE(watchOSSim);
+  EXPECT_EQ(watchOSSim->Platform, MachO::PLATFORM_WATCHOSSIMULATOR);
+}
diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp
index 4bd11d2..55944b8 100644
--- a/llvm/unittests/IR/DebugInfoTest.cpp
+++ b/llvm/unittests/IR/DebugInfoTest.cpp
@@ -968,8 +968,7 @@ TEST(MetadataTest, ConvertDbgToDPValue) {
   ItCount = 0;
   // Check these things store the same information; but that they're not the same
   // objects.
-  for (DPValue &Item :
-       DPValue::filter(RetInst->DbgMarker->getDbgRecordRange())) {
+  for (DPValue &Item : filterDbgVars(RetInst->DbgMarker->getDbgRecordRange())) {
     EXPECT_TRUE((Item.getRawLocation() == DPV2->getRawLocation() && ItCount == 0) ||
                 (Item.getRawLocation() == DPV1->getRawLocation() && ItCount == 1));
 
diff --git a/llvm/unittests/IR/IRBuilderTest.cpp b/llvm/unittests/IR/IRBuilderTest.cpp
index 139e883..77bd935 100644
--- a/llvm/unittests/IR/IRBuilderTest.cpp
+++ b/llvm/unittests/IR/IRBuilderTest.cpp
@@ -57,7 +57,7 @@ TEST_F(IRBuilderTest, Intrinsics) {
   IRBuilder<> Builder(BB);
   Value *V;
   Instruction *I;
-  CallInst *Call;
+  Value *Result;
   IntrinsicInst *II;
 
   V = Builder.CreateLoad(GV->getValueType(), GV);
@@ -65,78 +65,80 @@ TEST_F(IRBuilderTest, Intrinsics) {
   I->setHasNoInfs(true);
   I->setHasNoNaNs(false);
 
-  Call = Builder.CreateMinNum(V, V);
-  II = cast<IntrinsicInst>(Call);
+  Result = Builder.CreateMinNum(V, V);
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::minnum);
 
-  Call = Builder.CreateMaxNum(V, V);
-  II = cast<IntrinsicInst>(Call);
+  Result = Builder.CreateMaxNum(V, V);
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::maxnum);
 
-  Call = Builder.CreateMinimum(V, V);
-  II = cast<IntrinsicInst>(Call);
+  Result = Builder.CreateMinimum(V, V);
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::minimum);
 
-  Call = Builder.CreateMaximum(V, V);
-  II = cast<IntrinsicInst>(Call);
+  Result = Builder.CreateMaximum(V, V);
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::maximum);
 
-  Call = Builder.CreateIntrinsic(Intrinsic::readcyclecounter, {}, {});
-  II = cast<IntrinsicInst>(Call);
+  Result = Builder.CreateIntrinsic(Intrinsic::readcyclecounter, {}, {});
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::readcyclecounter);
 
-  Call = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, V);
-  II = cast<IntrinsicInst>(Call);
+  Result = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, V);
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::fabs);
   EXPECT_FALSE(II->hasNoInfs());
   EXPECT_FALSE(II->hasNoNaNs());
 
-  Call = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, V, I);
-  II = cast<IntrinsicInst>(Call);
+  Result = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, V, I);
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::fabs);
   EXPECT_TRUE(II->hasNoInfs());
   EXPECT_FALSE(II->hasNoNaNs());
 
-  Call = Builder.CreateBinaryIntrinsic(Intrinsic::pow, V, V);
-  II = cast<IntrinsicInst>(Call);
+  Result = Builder.CreateBinaryIntrinsic(Intrinsic::pow, V, V);
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::pow);
   EXPECT_FALSE(II->hasNoInfs());
   EXPECT_FALSE(II->hasNoNaNs());
 
-  Call = Builder.CreateBinaryIntrinsic(Intrinsic::pow, V, V, I);
-  II = cast<IntrinsicInst>(Call);
+  Result = Builder.CreateBinaryIntrinsic(Intrinsic::pow, V, V, I);
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::pow);
   EXPECT_TRUE(II->hasNoInfs());
   EXPECT_FALSE(II->hasNoNaNs());
 
-  Call = Builder.CreateIntrinsic(Intrinsic::fma, {V->getType()}, {V, V, V});
-  II = cast<IntrinsicInst>(Call);
+  Result = Builder.CreateIntrinsic(Intrinsic::fma, {V->getType()}, {V, V, V});
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::fma);
   EXPECT_FALSE(II->hasNoInfs());
   EXPECT_FALSE(II->hasNoNaNs());
 
-  Call = Builder.CreateIntrinsic(Intrinsic::fma, {V->getType()}, {V, V, V}, I);
-  II = cast<IntrinsicInst>(Call);
+  Result =
+      Builder.CreateIntrinsic(Intrinsic::fma, {V->getType()}, {V, V, V}, I);
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::fma);
   EXPECT_TRUE(II->hasNoInfs());
   EXPECT_FALSE(II->hasNoNaNs());
 
-  Call = Builder.CreateIntrinsic(Intrinsic::fma, {V->getType()}, {V, V, V}, I);
-  II = cast<IntrinsicInst>(Call);
+  Result =
+      Builder.CreateIntrinsic(Intrinsic::fma, {V->getType()}, {V, V, V}, I);
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::fma);
   EXPECT_TRUE(II->hasNoInfs());
   EXPECT_FALSE(II->hasNoNaNs());
 
-  Call = Builder.CreateUnaryIntrinsic(Intrinsic::roundeven, V);
-  II = cast<IntrinsicInst>(Call);
+  Result = Builder.CreateUnaryIntrinsic(Intrinsic::roundeven, V);
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::roundeven);
   EXPECT_FALSE(II->hasNoInfs());
   EXPECT_FALSE(II->hasNoNaNs());
 
-  Call = Builder.CreateIntrinsic(
+  Result = Builder.CreateIntrinsic(
       Intrinsic::set_rounding, {},
       {Builder.getInt32(static_cast<uint32_t>(RoundingMode::TowardZero))});
-  II = cast<IntrinsicInst>(Call);
+  II = cast<IntrinsicInst>(Result);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::set_rounding);
 }
 
diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp
index 767dd1a..ac27869 100644
--- a/llvm/unittests/IR/MetadataTest.cpp
+++ b/llvm/unittests/IR/MetadataTest.cpp
@@ -3560,6 +3560,27 @@ TEST_F(DIExpressionTest, foldConstant) {
 #undef EXPECT_FOLD_CONST
 }
 
+TEST_F(DIExpressionTest, appendToStackAssert) {
+  DIExpression *Expr = DIExpression::get(Context, {});
+
+  // Verify that the DW_OP_LLVM_convert operands, which have the same values as
+  // DW_OP_stack_value and DW_OP_LLVM_fragment, do not get interpreted as such
+  // operations. This previously triggered an assert.
+  uint64_t FromSize = dwarf::DW_OP_stack_value;
+  uint64_t ToSize = dwarf::DW_OP_LLVM_fragment;
+  uint64_t Ops[] = {
+      dwarf::DW_OP_LLVM_convert, FromSize, dwarf::DW_ATE_signed,
+      dwarf::DW_OP_LLVM_convert, ToSize,   dwarf::DW_ATE_signed,
+  };
+  Expr = DIExpression::appendToStack(Expr, Ops);
+
+  uint64_t Expected[] = {
+      dwarf::DW_OP_LLVM_convert, FromSize, dwarf::DW_ATE_signed,
+      dwarf::DW_OP_LLVM_convert, ToSize,   dwarf::DW_ATE_signed,
+      dwarf::DW_OP_stack_value};
+  EXPECT_EQ(Expr->getElements(), ArrayRef<uint64_t>(Expected));
+}
+
 typedef MetadataTest DIObjCPropertyTest;
 
 TEST_F(DIObjCPropertyTest, get) {
diff --git a/llvm/unittests/Support/KnownBitsTest.cpp b/llvm/unittests/Support/KnownBitsTest.cpp
index d3177ce..b867b6c 100644
--- a/llvm/unittests/Support/KnownBitsTest.cpp
+++ b/llvm/unittests/Support/KnownBitsTest.cpp
@@ -362,39 +362,14 @@ TEST(KnownBitsTest, BinaryExhaustive) {
         return Known1 ^ Known2;
       },
       [](const APInt &N1, const APInt &N2) { return N1 ^ N2; });
-
-  testBinaryOpExhaustive(
-      [](const KnownBits &Known1, const KnownBits &Known2) {
-        return KnownBits::umax(Known1, Known2);
-      },
-      [](const APInt &N1, const APInt &N2) { return APIntOps::umax(N1, N2); });
-  testBinaryOpExhaustive(
-      [](const KnownBits &Known1, const KnownBits &Known2) {
-        return KnownBits::umin(Known1, Known2);
-      },
-      [](const APInt &N1, const APInt &N2) { return APIntOps::umin(N1, N2); });
-  testBinaryOpExhaustive(
-      [](const KnownBits &Known1, const KnownBits &Known2) {
-        return KnownBits::smax(Known1, Known2);
-      },
-      [](const APInt &N1, const APInt &N2) { return APIntOps::smax(N1, N2); });
-  testBinaryOpExhaustive(
-      [](const KnownBits &Known1, const KnownBits &Known2) {
-        return KnownBits::smin(Known1, Known2);
-      },
-      [](const APInt &N1, const APInt &N2) { return APIntOps::smin(N1, N2); });
-  testBinaryOpExhaustive(
-      [](const KnownBits &Known1, const KnownBits &Known2) {
-        return KnownBits::abdu(Known1, Known2);
-      },
-      [](const APInt &N1, const APInt &N2) { return APIntOps::abdu(N1, N2); },
-      checkCorrectnessOnlyBinary);
-  testBinaryOpExhaustive(
-      [](const KnownBits &Known1, const KnownBits &Known2) {
-        return KnownBits::abds(Known1, Known2);
-      },
-      [](const APInt &N1, const APInt &N2) { return APIntOps::abds(N1, N2); },
-      checkCorrectnessOnlyBinary);
+  testBinaryOpExhaustive(KnownBits::umax, APIntOps::umax);
+  testBinaryOpExhaustive(KnownBits::umin, APIntOps::umin);
+  testBinaryOpExhaustive(KnownBits::smax, APIntOps::smax);
+  testBinaryOpExhaustive(KnownBits::smin, APIntOps::smin);
+  testBinaryOpExhaustive(KnownBits::abdu, APIntOps::abdu,
+                         checkCorrectnessOnlyBinary);
+  testBinaryOpExhaustive(KnownBits::abds, APIntOps::abds,
+                         checkCorrectnessOnlyBinary);
   testBinaryOpExhaustive(
       [](const KnownBits &Known1, const KnownBits &Known2) {
         return KnownBits::udiv(Known1, Known2);
@@ -457,33 +432,25 @@ TEST(KnownBitsTest, BinaryExhaustive) {
       },
       checkCorrectnessOnlyBinary);
   testBinaryOpExhaustive(
-      [](const KnownBits &Known1, const KnownBits &Known2) {
-        return KnownBits::sadd_sat(Known1, Known2);
-      },
+      KnownBits::sadd_sat,
       [](const APInt &N1, const APInt &N2) -> std::optional<APInt> {
         return N1.sadd_sat(N2);
       },
       checkCorrectnessOnlyBinary);
   testBinaryOpExhaustive(
-      [](const KnownBits &Known1, const KnownBits &Known2) {
-        return KnownBits::uadd_sat(Known1, Known2);
-      },
+      KnownBits::uadd_sat,
       [](const APInt &N1, const APInt &N2) -> std::optional<APInt> {
         return N1.uadd_sat(N2);
       },
       checkCorrectnessOnlyBinary);
   testBinaryOpExhaustive(
-      [](const KnownBits &Known1, const KnownBits &Known2) {
-        return KnownBits::ssub_sat(Known1, Known2);
-      },
+      KnownBits::ssub_sat,
       [](const APInt &N1, const APInt &N2) -> std::optional<APInt> {
         return N1.ssub_sat(N2);
       },
       checkCorrectnessOnlyBinary);
   testBinaryOpExhaustive(
-      [](const KnownBits &Known1, const KnownBits &Known2) {
-        return KnownBits::usub_sat(Known1, Known2);
-      },
+      KnownBits::usub_sat,
       [](const APInt &N1, const APInt &N2) -> std::optional<APInt> {
         return N1.usub_sat(N2);
       },
@@ -582,7 +549,6 @@ TEST(KnownBitsTest, BinaryExhaustive) {
         return N1.ashr(N2);
       },
       checkOptimalityBinary, /* RefinePoisonToZero */ true);
-
   testBinaryOpExhaustive(
       [](const KnownBits &Known1, const KnownBits &Known2) {
         return KnownBits::mul(Known1, Known2);
diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp
index d4abbb4..695b093 100644
--- a/llvm/unittests/Support/VirtualFileSystemTest.cpp
+++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp
@@ -7,9 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Errc.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SourceMgr.h"
@@ -3330,3 +3332,66 @@ TEST(RedirectingFileSystemTest, Used) {
   EXPECT_TRUE(Redirecting1->hasBeenUsed());
   EXPECT_FALSE(Redirecting2->hasBeenUsed());
 }
+
+// Check that paths looked up in the external filesystem are unmodified, except
+// potentially to add the working directory. We cannot canonicalize away ..
+// in the presence of symlinks in the external filesystem.
+TEST(RedirectingFileSystemTest, ExternalPaths) {
+  struct InterceptorFS : llvm::vfs::ProxyFileSystem {
+    std::vector<std::string> SeenPaths;
+
+    InterceptorFS(IntrusiveRefCntPtr<FileSystem> UnderlyingFS)
+        : ProxyFileSystem(UnderlyingFS) {}
+
+    llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override {
+      SeenPaths.push_back(Path.str());
+      return ProxyFileSystem::status(Path);
+    }
+
+    llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
+    openFileForRead(const Twine &Path) override {
+      SeenPaths.push_back(Path.str());
+      return ProxyFileSystem::openFileForRead(Path);
+    }
+
+    std::error_code isLocal(const Twine &Path, bool &Result) override {
+      SeenPaths.push_back(Path.str());
+      return ProxyFileSystem::isLocal(Path, Result);
+    }
+
+    vfs::directory_iterator dir_begin(const Twine &Dir,
+                                      std::error_code &EC) override {
+      SeenPaths.push_back(Dir.str());
+      return ProxyFileSystem::dir_begin(Dir, EC);
+    }
+  };
+
+  std::error_code EC;
+  auto BaseFS = makeIntrusiveRefCnt<DummyFileSystem>();
+  BaseFS->setCurrentWorkingDirectory("/cwd");
+  auto CheckFS = makeIntrusiveRefCnt<InterceptorFS>(BaseFS);
+  auto FS = vfs::RedirectingFileSystem::create({}, /*UseExternalNames=*/false,
+                                               *CheckFS);
+
+  FS->status("/a/../b");
+  FS->openFileForRead("c");
+  FS->exists("./d");
+  bool IsLocal = false;
+  FS->isLocal("/e/./../f", IsLocal);
+  FS->dir_begin(".././g", EC);
+
+  std::vector<std::string> Expected{"/a/../b", "/cwd/c", "/cwd/./d",
+                                    "/e/./../f", "/cwd/.././g"};
+
+  EXPECT_EQ(CheckFS->SeenPaths, Expected);
+
+  CheckFS->SeenPaths.clear();
+  FS->setRedirection(vfs::RedirectingFileSystem::RedirectKind::Fallback);
+  FS->status("/a/../b");
+  FS->openFileForRead("c");
+  FS->exists("./d");
+  FS->isLocal("/e/./../f", IsLocal);
+  FS->dir_begin(".././g", EC);
+
+  EXPECT_EQ(CheckFS->SeenPaths, Expected);
+}
diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
index 7f494e5..91c3b0b 100644
--- a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
+++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
@@ -40,12 +40,10 @@
 
 #include "CodeGenTarget.h"
 #include "PredicateExpander.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include <set>
 #include <vector>
 
 using namespace llvm;
@@ -61,14 +59,14 @@ class MacroFusionPredicatorEmitter {
                            raw_ostream &OS);
   void emitMacroFusionImpl(std::vector<Record *> Fusions, PredicateExpander &PE,
                            raw_ostream &OS);
-  void emitPredicates(std::vector<Record *> &FirstPredicate,
+  void emitPredicates(std::vector<Record *> &FirstPredicate, bool IsCommutable,
                       PredicateExpander &PE, raw_ostream &OS);
-  void emitFirstPredicate(Record *SecondPredicate, PredicateExpander &PE,
-                          raw_ostream &OS);
-  void emitSecondPredicate(Record *SecondPredicate, PredicateExpander &PE,
-                           raw_ostream &OS);
-  void emitBothPredicate(Record *Predicates, PredicateExpander &PE,
-                         raw_ostream &OS);
+  void emitFirstPredicate(Record *SecondPredicate, bool IsCommutable,
+                          PredicateExpander &PE, raw_ostream &OS);
+  void emitSecondPredicate(Record *SecondPredicate, bool IsCommutable,
+                           PredicateExpander &PE, raw_ostream &OS);
+  void emitBothPredicate(Record *Predicates, bool IsCommutable,
+                         PredicateExpander &PE, raw_ostream &OS);
 
 public:
   MacroFusionPredicatorEmitter(RecordKeeper &R) : Records(R), Target(R) {}
@@ -103,6 +101,7 @@ void MacroFusionPredicatorEmitter::emitMacroFusionImpl(
   for (Record *Fusion : Fusions) {
     std::vector<Record *> Predicates =
         Fusion->getValueAsListOfDefs("Predicates");
+    bool IsCommutable = Fusion->getValueAsBit("IsCommutable");
 
     OS << "bool is" << Fusion->getName() << "(\n";
     OS.indent(4) << "const TargetInstrInfo &TII,\n";
@@ -111,7 +110,7 @@ void MacroFusionPredicatorEmitter::emitMacroFusionImpl(
     OS.indent(4) << "const MachineInstr &SecondMI) {\n";
     OS.indent(2) << "auto &MRI = SecondMI.getMF()->getRegInfo();\n";
 
-    emitPredicates(Predicates, PE, OS);
+    emitPredicates(Predicates, IsCommutable, PE, OS);
 
     OS.indent(2) << "return true;\n";
     OS << "}\n";
@@ -122,15 +121,16 @@ void MacroFusionPredicatorEmitter::emitMacroFusionImpl(
 }
 
 void MacroFusionPredicatorEmitter::emitPredicates(
-    std::vector<Record *> &Predicates, PredicateExpander &PE, raw_ostream &OS) {
+    std::vector<Record *> &Predicates, bool IsCommutable, PredicateExpander &PE,
+    raw_ostream &OS) {
   for (Record *Predicate : Predicates) {
     Record *Target = Predicate->getValueAsDef("Target");
     if (Target->getName() == "first_fusion_target")
-      emitFirstPredicate(Predicate, PE, OS);
+      emitFirstPredicate(Predicate, IsCommutable, PE, OS);
     else if (Target->getName() == "second_fusion_target")
-      emitSecondPredicate(Predicate, PE, OS);
+      emitSecondPredicate(Predicate, IsCommutable, PE, OS);
     else if (Target->getName() == "both_fusion_target")
-      emitBothPredicate(Predicate, PE, OS);
+      emitBothPredicate(Predicate, IsCommutable, PE, OS);
     else
       PrintFatalError(Target->getLoc(),
                       "Unsupported 'FusionTarget': " + Target->getName());
@@ -138,6 +138,7 @@ void MacroFusionPredicatorEmitter::emitPredicates(
 }
 
 void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate,
+                                                      bool IsCommutable,
                                                       PredicateExpander &PE,
                                                       raw_ostream &OS) {
   if (Predicate->isSubClassOf("WildcardPred")) {
@@ -170,6 +171,7 @@ void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate,
 }
 
 void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate,
+                                                       bool IsCommutable,
                                                        PredicateExpander &PE,
                                                        raw_ostream &OS) {
   if (Predicate->isSubClassOf("FusionPredicateWithMCInstPredicate")) {
@@ -182,6 +184,36 @@ void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate,
     OS << ")\n";
     OS.indent(4) << "  return false;\n";
     OS.indent(2) << "}\n";
+  } else if (Predicate->isSubClassOf("SameReg")) {
+    int FirstOpIdx = Predicate->getValueAsInt("FirstOpIdx");
+    int SecondOpIdx = Predicate->getValueAsInt("SecondOpIdx");
+
+    OS.indent(2) << "if (!SecondMI.getOperand(" << FirstOpIdx
+                 << ").getReg().isVirtual()) {\n";
+    OS.indent(4) << "if (SecondMI.getOperand(" << FirstOpIdx
+                 << ").getReg() != SecondMI.getOperand(" << SecondOpIdx
+                 << ").getReg())";
+
+    if (IsCommutable) {
+      OS << " {\n";
+      OS.indent(6) << "if (!SecondMI.getDesc().isCommutable())\n";
+      OS.indent(6) << "  return false;\n";
+
+      OS.indent(6)
+          << "unsigned SrcOpIdx1 = " << SecondOpIdx
+          << ", SrcOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;\n";
+      OS.indent(6)
+          << "if (TII.findCommutedOpIndices(SecondMI, SrcOpIdx1, SrcOpIdx2))\n";
+      OS.indent(6)
+          << "  if (SecondMI.getOperand(" << FirstOpIdx
+          << ").getReg() != SecondMI.getOperand(SrcOpIdx2).getReg())\n";
+      OS.indent(6) << "    return false;\n";
+      OS.indent(4) << "}\n";
+    } else {
+      OS << "\n";
+      OS.indent(4) << "  return false;\n";
+    }
+    OS.indent(2) << "}\n";
   } else {
     PrintFatalError(Predicate->getLoc(),
                     "Unsupported predicate for second instruction: " +
@@ -190,13 +222,14 @@ void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate,
 }
 
 void MacroFusionPredicatorEmitter::emitBothPredicate(Record *Predicate,
+                                                     bool IsCommutable,
                                                      PredicateExpander &PE,
                                                      raw_ostream &OS) {
   if (Predicate->isSubClassOf("FusionPredicateWithCode"))
     OS << Predicate->getValueAsString("Predicate");
   else if (Predicate->isSubClassOf("BothFusionPredicateWithMCInstPredicate")) {
-    emitFirstPredicate(Predicate, PE, OS);
-    emitSecondPredicate(Predicate, PE, OS);
+    emitFirstPredicate(Predicate, IsCommutable, PE, OS);
+    emitSecondPredicate(Predicate, IsCommutable, PE, OS);
   } else if (Predicate->isSubClassOf("TieReg")) {
     int FirstOpIdx = Predicate->getValueAsInt("FirstOpIdx");
     int SecondOpIdx = Predicate->getValueAsInt("SecondOpIdx");
@@ -206,8 +239,28 @@ void MacroFusionPredicatorEmitter::emitBothPredicate(Record *Predicate,
                  << ").isReg() &&\n";
     OS.indent(2) << "      FirstMI->getOperand(" << FirstOpIdx
                  << ").getReg() == SecondMI.getOperand(" << SecondOpIdx
-                 << ").getReg()))\n";
-    OS.indent(2) << "  return false;\n";
+                 << ").getReg()))";
+
+    if (IsCommutable) {
+      OS << " {\n";
+      OS.indent(4) << "if (!SecondMI.getDesc().isCommutable())\n";
+      OS.indent(4) << "  return false;\n";
+
+      OS.indent(4)
+          << "unsigned SrcOpIdx1 = " << SecondOpIdx
+          << ", SrcOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;\n";
+      OS.indent(4)
+          << "if (TII.findCommutedOpIndices(SecondMI, SrcOpIdx1, SrcOpIdx2))\n";
+      OS.indent(4)
+          << "  if (FirstMI->getOperand(" << FirstOpIdx
+          << ").getReg() != SecondMI.getOperand(SrcOpIdx2).getReg())\n";
+      OS.indent(4) << "    return false;\n";
+      OS.indent(2) << "}";
+    } else {
+      OS << "\n";
+      OS.indent(2) << "  return false;";
+    }
+    OS << "\n";
   } else
     PrintFatalError(Predicate->getLoc(),
                     "Unsupported predicate for both instruction: " +
diff --git a/llvm/utils/gn/secondary/lldb/source/Host/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Host/BUILD.gn
index 39c3efa..7c9edfb 100644
--- a/llvm/utils/gn/secondary/lldb/source/Host/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Host/BUILD.gn
@@ -16,6 +16,7 @@ static_library("Host") {
   ]
   public_deps = [ "//llvm/utils/gn/build/libs/xml" ]
   sources = [
+    "common/Alarm.cpp",
     "common/File.cpp",
     "common/FileAction.cpp",
     "common/FileCache.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/MCTargetDesc/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/MCTargetDesc/BUILD.gn
index 3d8e3e6..12d875c 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/MCTargetDesc/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/MCTargetDesc/BUILD.gn
@@ -103,6 +103,7 @@ static_library("MCTargetDesc") {
     "AMDGPUInstPrinter.cpp",
     "AMDGPUMCAsmInfo.cpp",
     "AMDGPUMCCodeEmitter.cpp",
+    "AMDGPUMCExpr.cpp",
     "AMDGPUMCTargetDesc.cpp",
     "AMDGPUTargetStreamer.cpp",
     "R600InstPrinter.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn
index be1efff..3e05b9b 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn
@@ -24,6 +24,7 @@ unittest("OrcJITTests") {
     "JITTargetMachineBuilderTest.cpp",
     "LazyCallThroughAndReexportsTest.cpp",
     "LookupAndRecordAddrsTest.cpp",
+    "MachOPlatformTest.cpp",
     "MapperJITLinkMemoryManagerTest.cpp",
     "MemoryMapperTest.cpp",
     "ObjectFormatsTest.cpp",
diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h
index d823afb..b3d7a78 100644
--- a/mlir/include/mlir-c/Dialect/LLVM.h
+++ b/mlir/include/mlir-c/Dialect/LLVM.h
@@ -229,10 +229,10 @@ MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIBasicTypeAttrGet(
 
 /// Creates a LLVM DICompositeType attribute.
 MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDICompositeTypeAttrGet(
-    MlirContext ctx, unsigned int tag, MlirAttribute name, MlirAttribute file,
-    uint32_t line, MlirAttribute scope, MlirAttribute baseType, int64_t flags,
-    uint64_t sizeInBits, uint64_t alignInBits, intptr_t nElements,
-    MlirAttribute const *elements);
+    MlirContext ctx, unsigned int tag, MlirAttribute recId, MlirAttribute name,
+    MlirAttribute file, uint32_t line, MlirAttribute scope,
+    MlirAttribute baseType, int64_t flags, uint64_t sizeInBits,
+    uint64_t alignInBits, intptr_t nElements, MlirAttribute const *elements);
 
 /// Creates a LLVM DIDerivedType attribute.
 MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIDerivedTypeAttrGet(
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
index 809f034..a729bc9 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
@@ -148,7 +148,7 @@ std::unique_ptr<Pass> createBufferLoopHoistingPass();
 
 // Options struct for BufferResultsToOutParams pass.
 // Note: defined only here, not in tablegen.
-struct BufferResultsToOutParamsOptions {
+struct BufferResultsToOutParamsOpts {
   /// Memcpy function: Generate a memcpy between two memrefs.
   using MemCpyFn =
       std::function<LogicalResult(OpBuilder &, Location, Value, Value)>;
@@ -162,17 +162,21 @@ struct BufferResultsToOutParamsOptions {
   /// Memcpy function; used to create a copy between two memrefs.
   /// If this is empty, memref.copy is used.
   std::optional<MemCpyFn> memCpyFn;
+
+  /// If true, the pass adds a "bufferize.result" attribute to each output
+  /// parameter.
+  bool addResultAttribute = false;
 };
 
 /// Creates a pass that converts memref function results to out-params.
 std::unique_ptr<Pass> createBufferResultsToOutParamsPass(
-    const BufferResultsToOutParamsOptions &options = {});
+    const BufferResultsToOutParamsOpts &options = {});
 
 /// Replace buffers that are returned from a function with an out parameter.
 /// Also update all call sites.
 LogicalResult
 promoteBufferResultsToOutParams(ModuleOp module,
-                                const BufferResultsToOutParamsOptions &options);
+                                const BufferResultsToOutParamsOpts &options);
 
 /// Creates a pass that drops memref function results that are equivalent to a
 /// function argument.
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index e01f36b..1c3cdec 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -316,6 +316,11 @@ def BufferResultsToOutParams : Pass<"buffer-results-to-out-params", "ModuleOp">
     buffers for results need to be allocated in the caller. This currently only
     works for static shaped memrefs.
   }];
+  let options = [
+    Option<"addResultAttribute", "add-result-attr", "bool",
+       /*default=*/"false",
+       "Add the attribute 'bufferize.result' to all output parameters.">,
+  ];
   let constructor = "mlir::bufferization::createBufferResultsToOutParamsPass()";
   let dependentDialects = ["memref::MemRefDialect"];
 }
diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td
index ec842f7..78bfd56 100644
--- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td
+++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td
@@ -1155,4 +1155,36 @@ def EmitC_IfOp : EmitC_Op<"if",
   let hasCustomAssemblyFormat = 1;
 }
 
+def EmitC_SubscriptOp : EmitC_Op<"subscript",
+  [TypesMatchWith<"result type matches element type of 'array'",
+                  "array", "result",
+                  "::llvm::cast<ArrayType>($_self).getElementType()">]> {
+  let summary = "Array subscript operation";
+  let description = [{
+    With the `subscript` operation the subscript operator `[]` can be applied
+    to variables or arguments of array type.
+
+    Example:
+
+    ```mlir
+    %i = index.constant 1
+    %j = index.constant 7
+    %0 = emitc.subscript %arg0[%i, %j] : <4x8xf32>, index, index
+    ```
+  }];
+  let arguments = (ins Arg<EmitC_ArrayType, "the reference to load from">:$array,
+                       Variadic<IntegerIndexOrOpaqueType>:$indices);
+  let results = (outs AnyType:$result);
+
+  let builders = [
+    OpBuilder<(ins "Value":$array, "ValueRange":$indices), [{
+      build($_builder, $_state, cast<ArrayType>(array.getType()).getElementType(), array, indices);
+    }]>
+  ];
+
+  let hasVerifier = 1;
+  let assemblyFormat = "$array `[` $indices `]` attr-dict `:` type($array) `,` type($indices)";
+}
+
+
 #endif // MLIR_DIALECT_EMITC_IR_EMITC
diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitCTypes.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitCTypes.td
index 1ff4102..a2ba45a 100644
--- a/mlir/include/mlir/Dialect/EmitC/IR/EmitCTypes.td
+++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitCTypes.td
@@ -39,7 +39,7 @@ def EmitC_ArrayType : EmitC_Type<"Array", "array", [ShapedTypeInterface]> {
     // Array emitted as `int32_t[10]`
     !emitc.array<10xi32>
     // Array emitted as `float[10][20]`
-    !emitc.ptr<10x20xf32>
+    !emitc.array<10x20xf32>
     ```
   }];
 
diff --git a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
index 862abf0..759de74 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
@@ -29,6 +29,8 @@ add_mlir_doc(LLVMIntrinsicOps LLVMIntrinsicOps Dialects/ -gen-op-doc)
 set(LLVM_TARGET_DEFINITIONS LLVMInterfaces.td)
 mlir_tablegen(LLVMInterfaces.h.inc -gen-op-interface-decls)
 mlir_tablegen(LLVMInterfaces.cpp.inc -gen-op-interface-defs)
+mlir_tablegen(LLVMAttrInterfaces.h.inc -gen-attr-interface-decls)
+mlir_tablegen(LLVMAttrInterfaces.cpp.inc -gen-attr-interface-defs)
 mlir_tablegen(LLVMTypeInterfaces.h.inc -gen-type-interface-decls)
 mlir_tablegen(LLVMTypeInterfaces.cpp.inc -gen-type-interface-defs)
 add_public_tablegen_target(MLIRLLVMInterfacesIncGen)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
index a831b07..d4d56ae 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
@@ -10,6 +10,7 @@
 #define LLVMIR_ATTRDEFS
 
 include "mlir/Dialect/LLVMIR/LLVMDialect.td"
+include "mlir/Dialect/LLVMIR/LLVMInterfaces.td"
 include "mlir/IR/AttrTypeBase.td"
 include "mlir/IR/CommonAttrConstraints.td"
 
@@ -238,7 +239,7 @@ def LoopAnnotationAttr : LLVM_Attr<"LoopAnnotation", "loop_annotation"> {
 //===----------------------------------------------------------------------===//
 
 class LLVM_DIParameter<string summary, string default, string parseName,
-                       string printName = parseName>
+                       string errorCase, string printName = parseName>
     : AttrOrTypeParameter<"unsigned", "debug info " # summary> {
   let parser = [{ [&]() -> FailureOr<unsigned> {
     SMLoc tagLoc = $_parser.getCurrentLocation();
@@ -246,33 +247,35 @@ class LLVM_DIParameter<string summary, string default, string parseName,
     if ($_parser.parseKeyword(&name))
       return failure();
 
-    if (unsigned tag = llvm::dwarf::get}] # parseName # [{(name))
-      return tag;
-    return $_parser.emitError(tagLoc)
-      << "invalid debug info }] # summary # [{ name: " << name;
+    unsigned tag = llvm::dwarf::get}] # parseName # [{(name);
+    if (tag == }] # errorCase # [{)
+      return $_parser.emitError(tagLoc)
+        << "invalid debug info }] # summary # [{ name: " << name;
+    return tag;
   }() }];
   let printer = "$_printer << llvm::dwarf::" # printName # "String($_self)";
   let defaultValue = default;
 }
 
 def LLVM_DICallingConventionParameter : LLVM_DIParameter<
-  "calling convention", /*default=*/"0", "CallingConvention", "Convention"
+  "calling convention", /*default=*/"0", "CallingConvention", /*errorCase=*/"0",
+  "Convention"
 >;
 
 def LLVM_DIEncodingParameter : LLVM_DIParameter<
-  "encoding", /*default=*/"0", "AttributeEncoding"
+  "encoding", /*default=*/"0", "AttributeEncoding", /*errorCase=*/"0"
 >;
 
 def LLVM_DILanguageParameter : LLVM_DIParameter<
-  "language", /*default=*/"", "Language"
+  "language", /*default=*/"", "Language", /*errorCase=*/"0"
 >;
 
 def LLVM_DITagParameter : LLVM_DIParameter<
-  "tag", /*default=*/"", "Tag"
+  "tag", /*default=*/"", "Tag", /*errorCase=*/"llvm::dwarf::DW_TAG_invalid"
 >;
 
 def LLVM_DIOperationEncodingParameter : LLVM_DIParameter<
-  "operation encoding", /*default=*/"", "OperationEncoding"
+  "operation encoding", /*default=*/"", "OperationEncoding", /*errorCase=*/"0"
 >;
 
 //===----------------------------------------------------------------------===//
@@ -357,9 +360,11 @@ def LLVM_DICompileUnitAttr : LLVM_Attr<"DICompileUnit", "di_compile_unit",
 //===----------------------------------------------------------------------===//
 
 def LLVM_DICompositeTypeAttr : LLVM_Attr<"DICompositeType", "di_composite_type",
-                                         /*traits=*/[], "DITypeAttr"> {
+                                         [LLVM_DIRecursiveTypeAttrInterface],
+                                         "DITypeAttr"> {
   let parameters = (ins
     LLVM_DITagParameter:$tag,
+    OptionalParameter<"DistinctAttr">:$recId,
     OptionalParameter<"StringAttr">:$name,
     OptionalParameter<"DIFileAttr">:$file,
     OptionalParameter<"uint32_t">:$line,
@@ -371,6 +376,21 @@ def LLVM_DICompositeTypeAttr : LLVM_Attr<"DICompositeType", "di_composite_type",
     OptionalArrayRefParameter<"DINodeAttr">:$elements
   );
   let assemblyFormat = "`<` struct(params) `>`";
+  let extraClassDeclaration = [{
+    /// Requirements of DIRecursiveTypeAttrInterface.
+    /// @{
+
+    /// Get whether this attr describes a recursive self reference.
+    bool isRecSelf() { return getTag() == 0; }
+
+    /// Get a copy of this type attr but with the recursive ID set to `recId`.
+    DIRecursiveTypeAttrInterface withRecId(DistinctAttr recId);
+
+    /// Build a rec-self instance using the provided `recId`.
+    static DIRecursiveTypeAttrInterface getRecSelf(DistinctAttr recId);
+
+    /// @}
+  }];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h
index c370bfa..091a817 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h
@@ -84,6 +84,8 @@ using linkage::Linkage;
 } // namespace LLVM
 } // namespace mlir
 
+#include "mlir/Dialect/LLVMIR/LLVMAttrInterfaces.h.inc"
+
 #define GET_ATTRDEF_CLASSES
 #include "mlir/Dialect/LLVMIR/LLVMOpsAttrDefs.h.inc"
 
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMInterfaces.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMInterfaces.td
index 3b2a132..e7a1da8 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMInterfaces.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMInterfaces.td
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines op and type interfaces for the LLVM dialect in MLIR.
+// This file defines interfaces for the LLVM dialect in MLIR.
 //
 //===----------------------------------------------------------------------===//
 
@@ -319,4 +319,58 @@ def LLVM_PointerElementTypeInterface
   ];
 }
 
+//===----------------------------------------------------------------------===//
+// LLVM dialect attr interfaces.
+//===----------------------------------------------------------------------===//
+
+def LLVM_DIRecursiveTypeAttrInterface
+  : AttrInterface<"DIRecursiveTypeAttrInterface"> {
+  let description = [{
+    This attribute represents a DITypeAttr that is recursive. Only DITypeAttrs
+    that translate to LLVM DITypes that support mutation should implement this
+    interface.
+
+    There are two modes for conforming attributes:
+
+    1. "rec-decl":
+      - This attr is a recursive declaration identified by a recId.
+
+    2. "rec-self":
+      - This attr is considered a recursive self reference.
+      - This attr itself is a placeholder type that should be conceptually
+        replaced with the closest parent attr of the same type with the same
+        recId.
+
+    For example, to represent a linked list struct:
+
+      #rec_self = di_composite_type<recId = 0>
+      #ptr = di_derived_type<baseType: #rec_self, ...>
+      #field = di_derived_type<name = "next", baseType: #ptr, ...>
+      #rec = di_composite_type<recId = 0, name = "Node", elements: #field, ...>
+      #var = di_local_variable<type = #rec, ...>
+
+    Note that a rec-self without an outer rec-decl with the same recId is
+    conceptually the same as an "unbound" variable. The context needs to provide
+    meaning to the rec-self.
+  }];
+  let cppNamespace = "::mlir::LLVM";
+  let methods = [
+    InterfaceMethod<[{
+      Get whether this attr describes a recursive self reference.
+    }], "bool", "isRecSelf", (ins)>,
+    InterfaceMethod<[{
+      Get the recursive ID used for matching "rec-decl" with "rec-self".
+      If this attr instance is not recursive, return a null attribute.
+    }], "DistinctAttr", "getRecId", (ins)>,
+    InterfaceMethod<[{
+      Get a copy of this type attr but with the recursive ID set to `recId`.
+    }], "DIRecursiveTypeAttrInterface", "withRecId",
+    (ins "DistinctAttr":$recId)>,
+    StaticInterfaceMethod<[{
+      Build a rec-self instance using the provided `recId`.
+    }], "DIRecursiveTypeAttrInterface", "getRecSelf",
+    (ins "DistinctAttr":$recId)>
+  ];
+}
+
 #endif // LLVMIR_INTERFACES
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index abb38a3..1dabf5d 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -162,10 +162,18 @@ def ROCDL_BallotOp :
   ROCDL_Op<"ballot">,
   Results<(outs LLVM_Type:$res)>,
   Arguments<(ins I1:$pred)> {
+  let summary = "Vote across thread group";
+
+  let description = [{
+      Ballot provides a bit mask containing the 1-bit predicate value from each lane. 
+      The nth bit of the result contains the 1 bit contributed by the nth warp lane.
+  }];
+
   string llvmBuilder = [{
       $res = createIntrinsicCall(builder,
-            llvm::Intrinsic::amdgcn_ballot, {$pred}, {llvm::Type::getInt32Ty(moduleTranslation.getLLVMContext())});
+            llvm::Intrinsic::amdgcn_ballot, {$pred}, {$_resultType});
   }];
+
   let assemblyFormat = "$pred attr-dict `:` type($res)";
 }
 
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index c64ecb7..feb3b3f 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -460,7 +460,8 @@ LogicalResult promoteSubviewsPrecondition(Operation *op,
 LogicalResult vectorizeOpPrecondition(Operation *op,
                                       ArrayRef<int64_t> inputVectorSizes = {},
                                       ArrayRef<bool> inputScalableVecDims = {},
-                                      bool vectorizeNDExtract = false);
+                                      bool vectorizeNDExtract = false,
+                                      bool flatten1DDepthwiseConv = false);
 
 //===----------------------------------------------------------------------===//
 // Transformations exposed as functional-style API calls.
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 0bd402d..dcf04ab 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -575,7 +575,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
     The optional `order` attribute specifies which order the iterations of the
     associate loops are executed in. Currently the only option for this
     attribute is "concurrent".
-    
+
     The optional `byref` attribute indicates that reduction arguments should be
     passed by reference.
   }];
@@ -1347,10 +1347,10 @@ def MapInfoOp : OpenMP_Op<"map_info", [AttrSizedOperandSegments]> {
     - `var_type`: The type of the variable to copy.
     - `var_ptr_ptr`: Used when the variable copied is a member of a class, structure
       or derived type and refers to the originating struct.
-    - `members`:  Used to indicate mapped child members for the current MapInfoOp, 
-       represented as other MapInfoOp's, utilised in cases where a parent structure 
-       type and members of the structure type are being mapped at the same time. 
-       For example: map(to: parent, parent->member, parent->member2[:10])  
+    - `members`:  Used to indicate mapped child members for the current MapInfoOp,
+       represented as other MapInfoOp's, utilised in cases where a parent structure
+       type and members of the structure type are being mapped at the same time.
+       For example: map(to: parent, parent->member, parent->member2[:10])
     - `bounds`: Used when copying slices of array's, pointers or pointer members of
        objects (e.g. derived types or classes), indicates the bounds to be copied
        of the variable. When it's an array slice it is in rank order where rank 0
@@ -1391,7 +1391,7 @@ def MapInfoOp : OpenMP_Op<"map_info", [AttrSizedOperandSegments]> {
 // 2.14.2 target data Construct
 //===---------------------------------------------------------------------===//
 
-def Target_DataOp: OpenMP_Op<"target_data", [AttrSizedOperandSegments, 
+def Target_DataOp: OpenMP_Op<"target_data", [AttrSizedOperandSegments,
                                              MapClauseOwningOpInterface]>{
   let summary = "target data construct";
   let description = [{
@@ -1449,7 +1449,7 @@ def Target_DataOp: OpenMP_Op<"target_data", [AttrSizedOperandSegments,
 //===---------------------------------------------------------------------===//
 
 def Target_EnterDataOp: OpenMP_Op<"target_enter_data",
-                                                 [AttrSizedOperandSegments, 
+                                                 [AttrSizedOperandSegments,
                                                   MapClauseOwningOpInterface]>{
   let  summary = "target enter data construct";
   let description = [{
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
index ed086d3..2e37384 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
@@ -34,8 +34,8 @@ def OutlineableOpenMPOpInterface : OpInterface<"OutlineableOpenMPOpInterface"> {
 def MapClauseOwningOpInterface : OpInterface<"MapClauseOwningOpInterface"> {
   let description = [{
     OpenMP operations which own a list of omp::MapInfoOp's implement this interface
-    to allow generic access to deal with map operands to more easily manipulate 
-    this class of operations. 
+    to allow generic access to deal with map operands to more easily manipulate
+    this class of operations.
   }];
 
   let cppNamespace = "::mlir::omp";
@@ -45,7 +45,7 @@ def MapClauseOwningOpInterface : OpInterface<"MapClauseOwningOpInterface"> {
       (ins), [{
         return $_op.getMapOperands();
       }]>,
-      InterfaceMethod<"Get mutable map operands", "::mlir::MutableOperandRange", 
+      InterfaceMethod<"Get mutable map operands", "::mlir::MutableOperandRange",
                       "getMapOperandsMutable",
       (ins), [{
         return $_op.getMapOperandsMutable();
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
index 0498576..1b2981a 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
@@ -1419,7 +1419,7 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach",
 }
 
 //===----------------------------------------------------------------------===//
-// Sparse Tensor Debugging Operations.
+// Sparse Tensor Debugging and Test-Only Operations.
 //===----------------------------------------------------------------------===//
 
 def SparseTensor_PrintOp : SparseTensor_Op<"print">,
@@ -1440,4 +1440,15 @@ def SparseTensor_PrintOp : SparseTensor_Op<"print">,
   let assemblyFormat = "$tensor attr-dict `:` type($tensor)";
 }
 
+def SparseTensor_HasRuntimeLibraryOp
+    : SparseTensor_Op<"has_runtime_library", []>, Results<(outs I1:$result)> {
+  string summary = "Indicates whether running in runtime/codegen mode";
+  string description = [{
+    Returns a boolean value that indicates whether the sparse compiler runs in
+    runtime library mode or not. For testing only: This op is useful for writing
+    test cases that require different IR depending on runtime/codegen mode.
+  }];
+  let assemblyFormat = "attr-dict";
+}
+
 #endif // SPARSETENSOR_OPS
diff --git a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h
index f6b03a0..3ce16ef 100644
--- a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h
+++ b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h
@@ -9,12 +9,15 @@
 #ifndef MLIR_DIALECT_VECTOR_UTILS_VECTORUTILS_H_
 #define MLIR_DIALECT_VECTOR_UTILS_VECTORUTILS_H_
 
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/Support/LLVM.h"
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/TypeSwitch.h"
 
 namespace mlir {
 
@@ -98,6 +101,17 @@ bool isContiguousSlice(MemRefType memrefType, VectorType vectorType);
 std::optional<StaticTileOffsetRange>
 createUnrollIterator(VectorType vType, int64_t targetRank = 1);
 
+/// A wrapper for getMixedSizes for vector.transfer_read and
+/// vector.transfer_write Ops (for source and destination, respectively).
+///
+/// Tensor and MemRef types implement their own, very similar version of
+/// getMixedSizes. This method will call the appropriate version (depending on
+/// `hasTensorSemantics`). It will also automatically extract the operand for
+/// which to call it on (source for "read" and destination for "write" ops).
+SmallVector<OpFoldResult> getMixedSizesXfer(bool hasTensorSemantics,
+                                            Operation *xfer,
+                                            RewriterBase &rewriter);
+
 } // namespace vector
 
 /// Constructs a permutation map of invariant memref indices to vector
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
index f927b82..714e664 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
@@ -206,7 +206,7 @@ public:
     auto *lvlCOO = readCOO<V>(map, lvlSizes);
     auto *tensor = SparseTensorStorage<P, I, V>::newFromCOO(
         dimRank, getDimSizes(), lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim,
-        *lvlCOO);
+        lvlCOO);
     delete lvlCOO;
     return tensor;
   }
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
index 468782e..773957a 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
@@ -197,37 +197,22 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
                       const uint64_t *lvl2dim)
       : SparseTensorStorageBase(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
                                 dim2lvl, lvl2dim),
-        positions(lvlRank), coordinates(lvlRank), lvlCursor(lvlRank), coo() {}
+        positions(lvlRank), coordinates(lvlRank), lvlCursor(lvlRank) {}
 
 public:
   /// Constructs a sparse tensor with the given encoding, and allocates
-  /// overhead storage according to some simple heuristics. When the
-  /// `bool` argument is true and `lvlTypes` are all dense, then this
-  /// ctor will also initialize the values array with zeros. That
-  /// argument should be true when an empty tensor is intended; whereas
-  /// it should usually be false when the ctor will be followed up by
-  /// some other form of initialization.
+  /// overhead storage according to some simple heuristics. When lvlCOO
+  /// is set, the sparse tensor initializes with the contents from that
+  /// data structure. Otherwise, an empty sparse tensor results.
   SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
                       uint64_t lvlRank, const uint64_t *lvlSizes,
                       const LevelType *lvlTypes, const uint64_t *dim2lvl,
-                      const uint64_t *lvl2dim, SparseTensorCOO<V> *lvlCOO,
-                      bool initializeValuesIfAllDense);
+                      const uint64_t *lvl2dim, SparseTensorCOO<V> *lvlCOO);
 
   /// Constructs a sparse tensor with the given encoding, and initializes
-  /// the contents from the COO. This ctor performs the same heuristic
-  /// overhead-storage allocation as the ctor above.
-  SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
-                      uint64_t lvlRank, const uint64_t *lvlSizes,
-                      const LevelType *lvlTypes, const uint64_t *dim2lvl,
-                      const uint64_t *lvl2dim, SparseTensorCOO<V> &lvlCOO);
-
-  /// Constructs a sparse tensor with the given encoding, and initializes
-  /// the contents from the level buffers. This ctor allocates exactly
-  /// the required amount of overhead storage, not using any heuristics.
-  /// It assumes that the data provided by `lvlBufs` can be directly used to
-  /// interpret the result sparse tensor and performs *NO* integrity test on the
-  /// input data. It also assume that the trailing COO coordinate buffer is
-  /// passed in as a single AoS memory.
+  /// the contents from the level buffers. The constructor assumes that the
+  /// data provided by `lvlBufs` can be directly used to interpret the result
+  /// sparse tensor and performs no integrity test on the input data.
   SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
                       uint64_t lvlRank, const uint64_t *lvlSizes,
                       const LevelType *lvlTypes, const uint64_t *dim2lvl,
@@ -244,16 +229,14 @@ public:
   newFromCOO(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
              const uint64_t *lvlSizes, const LevelType *lvlTypes,
              const uint64_t *dim2lvl, const uint64_t *lvl2dim,
-             SparseTensorCOO<V> &lvlCOO);
+             SparseTensorCOO<V> *lvlCOO);
 
-  /// Allocates a new sparse tensor and initialize it with the data stored level
-  /// buffers directly.
+  /// Allocates a new sparse tensor and initialize it from the given buffers.
   static SparseTensorStorage<P, C, V> *
-  packFromLvlBuffers(uint64_t dimRank, const uint64_t *dimSizes,
-                     uint64_t lvlRank, const uint64_t *lvlSizes,
-                     const LevelType *lvlTypes, const uint64_t *dim2lvl,
-                     const uint64_t *lvl2dim, uint64_t srcRank,
-                     const intptr_t *buffers);
+  newFromBuffers(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
+                 const uint64_t *lvlSizes, const LevelType *lvlTypes,
+                 const uint64_t *dim2lvl, const uint64_t *lvl2dim,
+                 uint64_t srcRank, const intptr_t *buffers);
 
   ~SparseTensorStorage() final = default;
 
@@ -337,16 +320,6 @@ public:
     }
   }
 
-  /// Allocates a new COO object and initializes it with the contents.
-  /// Callers must make sure to delete the COO when they're done with it.
-  SparseTensorCOO<V> *toCOO() {
-    std::vector<uint64_t> dimCoords(getDimRank());
-    coo = new SparseTensorCOO<V>(getDimSizes(), values.size());
-    toCOO(0, 0, dimCoords);
-    assert(coo->getElements().size() == values.size());
-    return coo;
-  }
-
   /// Sort the unordered tensor in place, the method assumes that it is
   /// an unordered COO tensor.
   void sortInPlace() {
@@ -556,58 +529,10 @@ private:
     return -1u;
   }
 
-  // Performs forall on level entries and inserts into dim COO.
-  void toCOO(uint64_t parentPos, uint64_t l, std::vector<uint64_t> &dimCoords) {
-    if (l == getLvlRank()) {
-      map.pushbackward(lvlCursor.data(), dimCoords.data());
-      assert(coo);
-      assert(parentPos < values.size());
-      coo->add(dimCoords, values[parentPos]);
-      return;
-    }
-    if (isCompressedLvl(l)) {
-      const std::vector<P> &positionsL = positions[l];
-      assert(parentPos + 1 < positionsL.size());
-      const uint64_t pstart = static_cast<uint64_t>(positionsL[parentPos]);
-      const uint64_t pstop = static_cast<uint64_t>(positionsL[parentPos + 1]);
-      const std::vector<C> &coordinatesL = coordinates[l];
-      assert(pstop <= coordinatesL.size());
-      for (uint64_t pos = pstart; pos < pstop; pos++) {
-        lvlCursor[l] = static_cast<uint64_t>(coordinatesL[pos]);
-        toCOO(pos, l + 1, dimCoords);
-      }
-    } else if (isLooseCompressedLvl(l)) {
-      const std::vector<P> &positionsL = positions[l];
-      assert(2 * parentPos + 1 < positionsL.size());
-      const uint64_t pstart = static_cast<uint64_t>(positionsL[2 * parentPos]);
-      const uint64_t pstop =
-          static_cast<uint64_t>(positionsL[2 * parentPos + 1]);
-      const std::vector<C> &coordinatesL = coordinates[l];
-      assert(pstop <= coordinatesL.size());
-      for (uint64_t pos = pstart; pos < pstop; pos++) {
-        lvlCursor[l] = static_cast<uint64_t>(coordinatesL[pos]);
-        toCOO(pos, l + 1, dimCoords);
-      }
-    } else if (isSingletonLvl(l) || isNOutOfMLvl(l)) {
-      assert(parentPos < coordinates[l].size());
-      lvlCursor[l] = static_cast<uint64_t>(coordinates[l][parentPos]);
-      toCOO(parentPos, l + 1, dimCoords);
-    } else { // Dense level.
-      assert(isDenseLvl(l));
-      const uint64_t sz = getLvlSizes()[l];
-      const uint64_t pstart = parentPos * sz;
-      for (uint64_t c = 0; c < sz; c++) {
-        lvlCursor[l] = c;
-        toCOO(pstart + c, l + 1, dimCoords);
-      }
-    }
-  }
-
   std::vector<std::vector<P>> positions;
   std::vector<std::vector<C>> coordinates;
   std::vector<V> values;
   std::vector<uint64_t> lvlCursor;
-  SparseTensorCOO<V> *coo;
 };
 
 //===----------------------------------------------------------------------===//
@@ -621,9 +546,9 @@ SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newEmpty(
     uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
     const uint64_t *lvlSizes, const LevelType *lvlTypes,
     const uint64_t *dim2lvl, const uint64_t *lvl2dim) {
+  SparseTensorCOO<V> *noLvlCOO = nullptr;
   return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
-                                          lvlTypes, dim2lvl, lvl2dim, nullptr,
-                                          true);
+                                          lvlTypes, dim2lvl, lvl2dim, noLvlCOO);
 }
 
 template <typename P, typename C, typename V>
@@ -631,13 +556,14 @@ SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromCOO(
     uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
     const uint64_t *lvlSizes, const LevelType *lvlTypes,
     const uint64_t *dim2lvl, const uint64_t *lvl2dim,
-    SparseTensorCOO<V> &lvlCOO) {
+    SparseTensorCOO<V> *lvlCOO) {
+  assert(lvlCOO);
   return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
                                           lvlTypes, dim2lvl, lvl2dim, lvlCOO);
 }
 
 template <typename P, typename C, typename V>
-SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::packFromLvlBuffers(
+SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromBuffers(
     uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
     const uint64_t *lvlSizes, const LevelType *lvlTypes,
     const uint64_t *dim2lvl, const uint64_t *lvl2dim, uint64_t srcRank,
@@ -657,11 +583,9 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
     uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
     const uint64_t *lvlSizes, const LevelType *lvlTypes,
     const uint64_t *dim2lvl, const uint64_t *lvl2dim,
-    SparseTensorCOO<V> *lvlCOO, bool initializeValuesIfAllDense)
+    SparseTensorCOO<V> *lvlCOO)
     : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
                           dim2lvl, lvl2dim) {
-  assert(!lvlCOO || lvlRank == lvlCOO->getRank());
-  coo = lvlCOO;
   // Provide hints on capacity of positions and coordinates.
   // TODO: needs much fine-tuning based on actual sparsity; currently
   // we reserve position/coordinate space based on all previous dense
@@ -692,27 +616,20 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
       sz = detail::checkedMul(sz, lvlSizes[l]);
     }
   }
-  if (allDense && initializeValuesIfAllDense)
+  if (lvlCOO) {
+    /* New from COO: ensure it is sorted. */
+    assert(lvlCOO->getRank() == lvlRank);
+    lvlCOO->sort();
+    // Now actually insert the `elements`.
+    const auto &elements = lvlCOO->getElements();
+    const uint64_t nse = elements.size();
+    assert(values.size() == 0);
+    values.reserve(nse);
+    fromCOO(elements, 0, nse, 0);
+  } else if (allDense) {
+    /* New empty (all dense) */
     values.resize(sz, 0);
-}
-
-template <typename P, typename C, typename V>
-SparseTensorStorage<P, C, V>::SparseTensorStorage( // NOLINT
-    uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
-    const uint64_t *lvlSizes, const LevelType *lvlTypes,
-    const uint64_t *dim2lvl, const uint64_t *lvl2dim,
-    SparseTensorCOO<V> &lvlCOO)
-    : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
-                          dim2lvl, lvl2dim, nullptr, false) {
-  // Ensure lvlCOO is sorted.
-  assert(lvlRank == lvlCOO.getRank());
-  lvlCOO.sort();
-  // Now actually insert the `elements`.
-  const auto &elements = lvlCOO.getElements();
-  const uint64_t nse = elements.size();
-  assert(values.size() == 0);
-  values.reserve(nse);
-  fromCOO(elements, 0, nse, 0);
+  }
 }
 
 template <typename P, typename C, typename V>
diff --git a/mlir/include/mlir/Support/ToolUtilities.h b/mlir/include/mlir/Support/ToolUtilities.h
index d2c8940..511cb11 100644
--- a/mlir/include/mlir/Support/ToolUtilities.h
+++ b/mlir/include/mlir/Support/ToolUtilities.h
@@ -15,6 +15,8 @@
 
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+
 #include <memory>
 
 namespace llvm {
@@ -27,20 +29,24 @@ struct LogicalResult;
 using ChunkBufferHandler = function_ref<LogicalResult(
     std::unique_ptr<llvm::MemoryBuffer> chunkBuffer, raw_ostream &os)>;
 
-/// Splits the specified buffer on a marker (`// -----`), processes each chunk
-/// independently according to the normal `processChunkBuffer` logic, and writes
-/// all results to `os`.
+extern inline const char *const kDefaultSplitMarker = "// -----";
+
+/// Splits the specified buffer on a marker (`// -----` by default), processes
+/// each chunk independently according to the normal `processChunkBuffer` logic,
+/// and writes all results to `os`.
 ///
 /// This is used to allow a large number of small independent tests to be put
-/// into a single file. `enableSplitting` can be used to toggle if splitting
-/// should be enabled, e.g. to allow for merging split and non-split code paths.
-/// When `insertMarkerInOutput` is true, split markers (`//-----`) are placed
-/// between each of the processed output chunks.
+/// into a single file. The input split marker is configurable. If it is empty,
+/// merging is disabled, which allows for merging split and non-split code
+/// paths. Output split markers (`//-----` by default) followed by a new line
+/// character, respectively, are placed between each of the processed output
+/// chunks. (The new line character is inserted even if the split marker is
+/// empty.)
 LogicalResult
 splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                       ChunkBufferHandler processChunkBuffer, raw_ostream &os,
-                      bool enableSplitting = true,
-                      bool insertMarkerInOutput = false);
+                      llvm::StringRef inputSplitMarker = kDefaultSplitMarker,
+                      llvm::StringRef outputSplitMarker = "");
 } // namespace mlir
 
 #endif // MLIR_SUPPORT_TOOLUTILITIES_H
diff --git a/mlir/include/mlir/Target/Cpp/CppEmitter.h b/mlir/include/mlir/Target/Cpp/CppEmitter.h
index 30d3fff..d3c0810 100644
--- a/mlir/include/mlir/Target/Cpp/CppEmitter.h
+++ b/mlir/include/mlir/Target/Cpp/CppEmitter.h
@@ -13,13 +13,11 @@
 #ifndef MLIR_TARGET_CPP_CPPEMITTER_H
 #define MLIR_TARGET_CPP_CPPEMITTER_H
 
-#include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/Value.h"
-#include "llvm/ADT/ScopedHashTable.h"
-#include "llvm/Support/raw_ostream.h"
-#include <stack>
+#include "mlir/Support/LLVM.h"
 
 namespace mlir {
+struct LogicalResult;
+class Operation;
 namespace emitc {
 
 /// Translates the given operation to C++ code. The operation or operations in
diff --git a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
index 6e90fad..8adc809 100644
--- a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
+++ b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
@@ -15,6 +15,7 @@
 
 #include "mlir/Debug/CLOptionsSetup.h"
 #include "mlir/Support/LogicalResult.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "llvm/ADT/StringRef.h"
 
 #include <cstdlib>
@@ -136,13 +137,24 @@ public:
   }
   bool shouldShowDialects() const { return showDialectsFlag; }
 
-  /// Set whether to split the input file based on the `// -----` marker into
-  /// pieces and process each chunk independently.
-  MlirOptMainConfig &splitInputFile(bool split = true) {
-    splitInputFileFlag = split;
+  /// Set the marker on which to split the input into chunks and process each
+  /// chunk independently. Input is not split if empty.
+  MlirOptMainConfig &
+  splitInputFile(std::string splitMarker = kDefaultSplitMarker) {
+    splitInputFileFlag = std::move(splitMarker);
+    return *this;
+  }
+  bool shouldSplitInputFile() const { return splitInputFileFlag.empty(); }
+  StringRef inputSplitMarker() const { return splitInputFileFlag; }
+
+  /// Set whether to merge the output chunks into one file using the given
+  /// marker.
+  MlirOptMainConfig &
+  outputSplitMarker(std::string splitMarker = kDefaultSplitMarker) {
+    outputSplitMarkerFlag = std::move(splitMarker);
     return *this;
   }
-  bool shouldSplitInputFile() const { return splitInputFileFlag; }
+  StringRef outputSplitMarker() const { return outputSplitMarkerFlag; }
 
   /// Disable implicit addition of a top-level module op during parsing.
   MlirOptMainConfig &useExplicitModule(bool useExplicitModule) {
@@ -215,9 +227,12 @@ protected:
   /// Show the registered dialects before trying to load the input file.
   bool showDialectsFlag = false;
 
-  /// Split the input file based on the `// -----` marker into pieces and
-  /// process each chunk independently.
-  bool splitInputFileFlag = false;
+  /// Split the input file based on the given marker into chunks and process
+  /// each chunk independently. Input is not split if empty.
+  std::string splitInputFileFlag = "";
+
+  /// Merge output chunks into one file using the given marker.
+  std::string outputSplitMarkerFlag = "";
 
   /// Use an explicit top-level module op during parsing.
   bool useExplicitModuleFlag = false;
diff --git a/mlir/lib/CAPI/Dialect/LLVM.cpp b/mlir/lib/CAPI/Dialect/LLVM.cpp
index 2d938ce..d0fd5ce 100644
--- a/mlir/lib/CAPI/Dialect/LLVM.cpp
+++ b/mlir/lib/CAPI/Dialect/LLVM.cpp
@@ -152,18 +152,18 @@ MlirAttribute mlirLLVMDIBasicTypeAttrGet(MlirContext ctx, unsigned int tag,
 }
 
 MlirAttribute mlirLLVMDICompositeTypeAttrGet(
-    MlirContext ctx, unsigned int tag, MlirAttribute name, MlirAttribute file,
-    uint32_t line, MlirAttribute scope, MlirAttribute baseType, int64_t flags,
-    uint64_t sizeInBits, uint64_t alignInBits, intptr_t nElements,
-    MlirAttribute const *elements) {
+    MlirContext ctx, unsigned int tag, MlirAttribute recId, MlirAttribute name,
+    MlirAttribute file, uint32_t line, MlirAttribute scope,
+    MlirAttribute baseType, int64_t flags, uint64_t sizeInBits,
+    uint64_t alignInBits, intptr_t nElements, MlirAttribute const *elements) {
   SmallVector<Attribute> elementsStorage;
   elementsStorage.reserve(nElements);
 
   return wrap(DICompositeTypeAttr::get(
-      unwrap(ctx), tag, cast<StringAttr>(unwrap(name)),
-      cast<DIFileAttr>(unwrap(file)), line, cast<DIScopeAttr>(unwrap(scope)),
-      cast<DITypeAttr>(unwrap(baseType)), DIFlags(flags), sizeInBits,
-      alignInBits,
+      unwrap(ctx), tag, cast<DistinctAttr>(unwrap(recId)),
+      cast<StringAttr>(unwrap(name)), cast<DIFileAttr>(unwrap(file)), line,
+      cast<DIScopeAttr>(unwrap(scope)), cast<DITypeAttr>(unwrap(baseType)),
+      DIFlags(flags), sizeInBits, alignInBits,
       llvm::map_to_vector(unwrapList(nElements, elements, elementsStorage),
                           [](Attribute a) { return a.cast<DINodeAttr>(); })));
 }
diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp
index 12a5e20..7672927 100644
--- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp
+++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp
@@ -845,6 +845,7 @@ struct SqrtOpConversion : public OpConversionPattern<complex::SqrtOp> {
     auto type = cast<ComplexType>(op.getType());
     Type elementType = type.getElementType();
     Value arg = adaptor.getComplex();
+    arith::FastMathFlagsAttr fmf = op.getFastMathFlagsAttr();
 
     Value zero =
         b.create<arith::ConstantOp>(elementType, b.getZeroAttr(elementType));
@@ -852,14 +853,14 @@ struct SqrtOpConversion : public OpConversionPattern<complex::SqrtOp> {
     Value real = b.create<complex::ReOp>(elementType, adaptor.getComplex());
     Value imag = b.create<complex::ImOp>(elementType, adaptor.getComplex());
 
-    Value absLhs = b.create<math::AbsFOp>(real);
-    Value absArg = b.create<complex::AbsOp>(elementType, arg);
-    Value addAbs = b.create<arith::AddFOp>(absLhs, absArg);
+    Value absLhs = b.create<math::AbsFOp>(real, fmf);
+    Value absArg = b.create<complex::AbsOp>(elementType, arg, fmf);
+    Value addAbs = b.create<arith::AddFOp>(absLhs, absArg, fmf);
 
     Value half = b.create<arith::ConstantOp>(elementType,
                                              b.getFloatAttr(elementType, 0.5));
-    Value halfAddAbs = b.create<arith::MulFOp>(addAbs, half);
-    Value sqrtAddAbs = b.create<math::SqrtOp>(halfAddAbs);
+    Value halfAddAbs = b.create<arith::MulFOp>(addAbs, half, fmf);
+    Value sqrtAddAbs = b.create<math::SqrtOp>(halfAddAbs, fmf);
 
     Value realIsNegative =
         b.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, real, zero);
@@ -869,7 +870,7 @@ struct SqrtOpConversion : public OpConversionPattern<complex::SqrtOp> {
     Value resultReal = sqrtAddAbs;
 
     Value imagDivTwoResultReal = b.create<arith::DivFOp>(
-        imag, b.create<arith::AddFOp>(resultReal, resultReal));
+        imag, b.create<arith::AddFOp>(resultReal, resultReal, fmf), fmf);
 
     Value negativeResultReal = b.create<arith::NegFOp>(resultReal);
 
@@ -882,7 +883,7 @@ struct SqrtOpConversion : public OpConversionPattern<complex::SqrtOp> {
     resultReal = b.create<arith::SelectOp>(
         realIsNegative,
         b.create<arith::DivFOp>(
-            imag, b.create<arith::AddFOp>(resultImag, resultImag)),
+            imag, b.create<arith::AddFOp>(resultImag, resultImag, fmf), fmf),
         resultReal);
 
     Value realIsZero =
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
index 930f035..a2222e1 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
@@ -21,7 +21,7 @@ namespace bufferization {
 } // namespace mlir
 
 using namespace mlir;
-using MemCpyFn = bufferization::BufferResultsToOutParamsOptions::MemCpyFn;
+using MemCpyFn = bufferization::BufferResultsToOutParamsOpts::MemCpyFn;
 
 /// Return `true` if the given MemRef type has a fully dynamic layout.
 static bool hasFullyDynamicLayoutMap(MemRefType type) {
@@ -45,9 +45,12 @@ static bool hasStaticIdentityLayout(MemRefType type) {
 // Updates the func op and entry block.
 //
 // Any args appended to the entry block are added to `appendedEntryArgs`.
+// If `addResultAttribute` is true, adds the unit attribute `bufferize.result`
+// to each newly created function argument.
 static LogicalResult
 updateFuncOp(func::FuncOp func,
-             SmallVectorImpl<BlockArgument> &appendedEntryArgs) {
+             SmallVectorImpl<BlockArgument> &appendedEntryArgs,
+             bool addResultAttribute) {
   auto functionType = func.getFunctionType();
 
   // Collect information about the results will become appended arguments.
@@ -80,6 +83,10 @@ updateFuncOp(func::FuncOp func,
   for (int i = 0, e = erasedResultTypes.size(); i < e; ++i, ++erasedIndicesIt) {
     func.setArgAttrs(functionType.getNumInputs() + i,
                      func.getResultAttrs(*erasedIndicesIt));
+    if (addResultAttribute)
+      func.setArgAttr(functionType.getNumInputs() + i,
+                      StringAttr::get(func.getContext(), "bufferize.result"),
+                      UnitAttr::get(func.getContext()));
   }
 
   // Erase the results.
@@ -127,7 +134,7 @@ static LogicalResult updateReturnOps(func::FuncOp func,
 // temporary buffers for newly introduced out params.
 static LogicalResult
 updateCalls(ModuleOp module,
-            const bufferization::BufferResultsToOutParamsOptions &options) {
+            const bufferization::BufferResultsToOutParamsOpts &options) {
   bool didFail = false;
   SymbolTable symtab(module);
   module.walk([&](func::CallOp op) {
@@ -189,12 +196,13 @@ updateCalls(ModuleOp module,
 
 LogicalResult mlir::bufferization::promoteBufferResultsToOutParams(
     ModuleOp module,
-    const bufferization::BufferResultsToOutParamsOptions &options) {
+    const bufferization::BufferResultsToOutParamsOpts &options) {
   for (auto func : module.getOps<func::FuncOp>()) {
     if (!options.filterFn(&func))
       continue;
     SmallVector<BlockArgument, 6> appendedEntryArgs;
-    if (failed(updateFuncOp(func, appendedEntryArgs)))
+    if (failed(
+            updateFuncOp(func, appendedEntryArgs, options.addResultAttribute)))
       return failure();
     if (func.isExternal())
       continue;
@@ -218,21 +226,25 @@ struct BufferResultsToOutParamsPass
     : bufferization::impl::BufferResultsToOutParamsBase<
           BufferResultsToOutParamsPass> {
   explicit BufferResultsToOutParamsPass(
-      const bufferization::BufferResultsToOutParamsOptions &options)
+      const bufferization::BufferResultsToOutParamsOpts &options)
       : options(options) {}
 
   void runOnOperation() override {
+    // Convert from pass options in tablegen to BufferResultsToOutParamsOpts.
+    if (addResultAttribute)
+      options.addResultAttribute = true;
+
     if (failed(bufferization::promoteBufferResultsToOutParams(getOperation(),
                                                               options)))
       return signalPassFailure();
   }
 
 private:
-  bufferization::BufferResultsToOutParamsOptions options;
+  bufferization::BufferResultsToOutParamsOpts options;
 };
 } // namespace
 
 std::unique_ptr<Pass> mlir::bufferization::createBufferResultsToOutParamsPass(
-    const bufferization::BufferResultsToOutParamsOptions &options) {
+    const bufferization::BufferResultsToOutParamsOpts &options) {
   return std::make_unique<BufferResultsToOutParamsPass>(options);
 }
diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp
index 9426bbb..e401a83 100644
--- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp
+++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp
@@ -132,9 +132,10 @@ LogicalResult ApplyOp::verify() {
 LogicalResult emitc::AssignOp::verify() {
   Value variable = getVar();
   Operation *variableDef = variable.getDefiningOp();
-  if (!variableDef || !llvm::isa<emitc::VariableOp>(variableDef))
+  if (!variableDef ||
+      !llvm::isa<emitc::VariableOp, emitc::SubscriptOp>(variableDef))
     return emitOpError() << "requires first operand (" << variable
-                         << ") to be a Variable";
+                         << ") to be a Variable or subscript";
 
   Value value = getValue();
   if (variable.getType() != value.getType())
@@ -747,6 +748,20 @@ LogicalResult emitc::YieldOp::verify() {
 }
 
 //===----------------------------------------------------------------------===//
+// SubscriptOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult emitc::SubscriptOp::verify() {
+  if (getIndices().size() != (size_t)getArray().getType().getRank()) {
+    return emitOpError() << "requires number of indices ("
+                         << getIndices().size()
+                         << ") to match the rank of the array type ("
+                         << getArray().getType().getRank() << ")";
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
 // TableGen'd op method definitions
 //===----------------------------------------------------------------------===//
 
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp
index 645a45d..a44e833 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp
@@ -35,6 +35,7 @@ static LogicalResult parseExpressionArg(AsmParser &parser, uint64_t opcode,
 static void printExpressionArg(AsmPrinter &printer, uint64_t opcode,
                                ArrayRef<uint64_t> args);
 
+#include "mlir/Dialect/LLVMIR/LLVMAttrInterfaces.cpp.inc"
 #include "mlir/Dialect/LLVMIR/LLVMOpsEnums.cpp.inc"
 #define GET_ATTRDEF_CLASSES
 #include "mlir/Dialect/LLVMIR/LLVMOpsAttrDefs.cpp.inc"
@@ -186,6 +187,24 @@ void printExpressionArg(AsmPrinter &printer, uint64_t opcode,
 }
 
 //===----------------------------------------------------------------------===//
+// DICompositeTypeAttr
+//===----------------------------------------------------------------------===//
+
+DIRecursiveTypeAttrInterface
+DICompositeTypeAttr::withRecId(DistinctAttr recId) {
+  return DICompositeTypeAttr::get(getContext(), getTag(), recId, getName(),
+                                  getFile(), getLine(), getScope(),
+                                  getBaseType(), getFlags(), getSizeInBits(),
+                                  getAlignInBits(), getElements());
+}
+
+DIRecursiveTypeAttrInterface
+DICompositeTypeAttr::getRecSelf(DistinctAttr recId) {
+  return DICompositeTypeAttr::get(recId.getContext(), 0, recId, {}, {}, 0, {},
+                                  {}, DIFlags(), 0, 0, {});
+}
+
+//===----------------------------------------------------------------------===//
 // TargetFeaturesAttr
 //===----------------------------------------------------------------------===//
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 1e703da..c74ab1e 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -24,6 +24,7 @@
 #include "mlir/Dialect/Utils/StructuredOpsUtils.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.h"
+#include "mlir/Dialect/Vector/Utils/VectorUtils.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinTypeInterfaces.h"
@@ -54,6 +55,8 @@ using namespace mlir::linalg;
 /// Try to vectorize `convOp` as a convolution.
 static FailureOr<Operation *>
 vectorizeConvolution(RewriterBase &rewriter, LinalgOp convOp,
+                     ArrayRef<int64_t> inputVecSizes = {},
+                     ArrayRef<bool> inputVecScalableFlags = {},
                      bool flatten1DDepthwiseConv = false);
 
 /// Return the unique instance of OpType in `block` if it is indeed unique.
@@ -1712,7 +1715,40 @@ static LogicalResult reductionPreconditions(LinalgOp op) {
   return success();
 }
 
-static LogicalResult vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op) {
+static LogicalResult
+vectorizeDynamicConvOpPrecondition(linalg::LinalgOp conv,
+                                   bool flatten1DDepthwiseConv) {
+  if (flatten1DDepthwiseConv) {
+    LDBG("Vectorization of flattened convs with dynamic shapes is not "
+         "supported\n");
+    return failure();
+  }
+
+  if (!isa<linalg::DepthwiseConv1DNwcWcOp>(conv)) {
+    LDBG("Not a 1D depth-wise WC conv, dynamic shapes are not supported\n");
+    return failure();
+  }
+
+  // Support dynamic shapes in 1D depthwise convolution, but only in the
+  // _channel_ dimension.
+  Value lhs = conv.getDpsInputOperand(0)->get();
+  ArrayRef<int64_t> lhsShape = cast<ShapedType>(lhs.getType()).getShape();
+  auto shapeWithoutCh = lhsShape.drop_back(1);
+  if (ShapedType::isDynamicShape(shapeWithoutCh)) {
+    LDBG("Dynamically-shaped op vectorization precondition failed: only "
+         "channel dim can be dynamic\n");
+    return failure();
+  }
+
+  return success();
+}
+
+static LogicalResult
+vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op,
+                                     bool flatten1DDepthwiseConv) {
+  if (isa<ConvolutionOpInterface>(op.getOperation()))
+    return vectorizeDynamicConvOpPrecondition(op, flatten1DDepthwiseConv);
+
   // TODO: Masking only supports dynamic element-wise ops, linalg.generic ops,
   // linalg.copy ops and ops that implement ContractionOpInterface for now.
   if (!isElementwise(op) &&
@@ -1778,10 +1814,9 @@ vectorizeUnPackOpPrecondition(tensor::UnPackOp unpackOp,
   return success();
 }
 
-static LogicalResult
-vectorizeLinalgOpPrecondition(LinalgOp linalgOp,
-                              ArrayRef<int64_t> inputVectorSizes,
-                              bool vectorizeNDExtract) {
+static LogicalResult vectorizeLinalgOpPrecondition(
+    LinalgOp linalgOp, ArrayRef<int64_t> inputVectorSizes,
+    bool vectorizeNDExtract, bool flatten1DDepthwiseConv) {
   // tensor with dimension of 0 cannot be vectorized.
   if (llvm::is_contained(linalgOp.getStaticShape(), 0))
     return failure();
@@ -1791,8 +1826,8 @@ vectorizeLinalgOpPrecondition(LinalgOp linalgOp,
                                       inputVectorSizes)))
     return failure();
 
-  if (linalgOp.hasDynamicShape() &&
-      failed(vectorizeDynamicLinalgOpPrecondition(linalgOp))) {
+  if (linalgOp.hasDynamicShape() && failed(vectorizeDynamicLinalgOpPrecondition(
+                                        linalgOp, flatten1DDepthwiseConv))) {
     LDBG("Dynamically-shaped op failed vectorization pre-conditions\n");
     return failure();
   }
@@ -1911,14 +1946,17 @@ vectorizeScalableVectorPrecondition(Operation *op,
   if (!isScalable)
     return success();
 
-  // Only element-wise ops supported in the presence of scalable dims.
+  // Only element-wise and 1d depthwise conv ops supported in the presence of
+  // scalable dims.
   auto linalgOp = dyn_cast<LinalgOp>(op);
-  return success(linalgOp && isElementwise(linalgOp));
+  return success(linalgOp && (isElementwise(linalgOp) ||
+                              isa<linalg::DepthwiseConv1DNwcWcOp>(op)));
 }
 
 LogicalResult mlir::linalg::vectorizeOpPrecondition(
     Operation *op, ArrayRef<int64_t> inputVectorSizes,
-    ArrayRef<bool> inputScalableVecDims, bool vectorizeNDExtract) {
+    ArrayRef<bool> inputScalableVecDims, bool vectorizeNDExtract,
+    bool flatten1DDepthwiseConv) {
   if (failed(vectorizeScalableVectorPrecondition(op, inputVectorSizes,
                                                  inputScalableVecDims)))
     return failure();
@@ -1926,7 +1964,8 @@ LogicalResult mlir::linalg::vectorizeOpPrecondition(
   return TypeSwitch<Operation *, LogicalResult>(op)
       .Case<linalg::LinalgOp>([&](auto linalgOp) {
         return vectorizeLinalgOpPrecondition(linalgOp, inputVectorSizes,
-                                             vectorizeNDExtract);
+                                             vectorizeNDExtract,
+                                             flatten1DDepthwiseConv);
       })
       .Case<tensor::PadOp>([&](auto padOp) {
         return vectorizePadOpPrecondition(padOp, inputVectorSizes);
@@ -1975,7 +2014,8 @@ LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op,
   LLVM_DEBUG(llvm::dbgs() << "\n");
 
   if (failed(vectorizeOpPrecondition(op, inputVectorSizes, inputScalableVecDims,
-                                     vectorizeNDExtract))) {
+                                     vectorizeNDExtract,
+                                     flatten1DDepthwiseConv))) {
     LDBG("Vectorization pre-conditions failed\n");
     return failure();
   }
@@ -1999,7 +2039,8 @@ LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op,
             // inference.
             if (isa<ConvolutionOpInterface>(linalgOp.getOperation())) {
               FailureOr<Operation *> convOr = vectorizeConvolution(
-                  rewriter, linalgOp, flatten1DDepthwiseConv);
+                  rewriter, linalgOp, inputVectorSizes, inputScalableVecDims,
+                  flatten1DDepthwiseConv);
               if (succeeded(convOr)) {
                 llvm::append_range(results, (*convOr)->getResults());
                 return success();
@@ -3131,13 +3172,30 @@ struct Conv1DGenerator
   /// kw is always unrolled.
   /// TODO: w (resp. kw) is unrolled when the strideW ( resp. dilationW) is
   /// > 1.
-  FailureOr<Operation *> depthwiseConv(bool flatten) {
+  FailureOr<Operation *> depthwiseConv(uint64_t channelDimVecSize,
+                                       bool channelDimScalableFlag,
+                                       bool flatten) {
     if (!valid)
       return rewriter.notifyMatchFailure(op, "unvectorizable depthwise conv");
 
+    bool scalableChDim = false;
+    bool useMasking = false;
     int64_t nSize, wSize, cSize, kwSize;
     // kernel{kw, c}
     bindShapeDims(rhsShapedType, kwSize, cSize);
+    if (ShapedType::isDynamic(cSize)) {
+      assert(channelDimVecSize != 0 && "Channel dim vec size must be > 0");
+      cSize = channelDimVecSize;
+      // Scalable vectors are only used when both conditions are met:
+      //  1. channel dim is dynamic
+      //  2. channelDimScalableFlag is set
+      scalableChDim = channelDimScalableFlag;
+      useMasking = true;
+    }
+
+    assert(!(useMasking && flatten) &&
+           "Unsupported flattened conv with dynamic shapes");
+
     // out{n, w, c}
     bindShapeDims(resShapedType, nSize, wSize);
 
@@ -3158,20 +3216,51 @@ struct Conv1DGenerator
          //   (i.e. 16 convolved with 3 (@stride 1 dilation 1) -> 14)
          ((wSize - 1) * strideW + 1) + ((kwSize - 1) * dilationW + 1) - 1,
          cSize},
-        lhsEltType);
-    VectorType rhsType = VectorType::get({kwSize, cSize}, rhsEltType);
-    VectorType resType = VectorType::get({nSize, wSize, cSize}, resEltType);
+        lhsEltType, /*scalableDims=*/{false, false, scalableChDim});
+    VectorType rhsType =
+        VectorType::get({kwSize, cSize}, rhsEltType,
+                        /*scalableDims=*/{false, scalableChDim});
+    VectorType resType =
+        VectorType::get({nSize, wSize, cSize}, resEltType,
+                        /*scalableDims=*/{false, false, scalableChDim});
+
+    // Masks the input xfer Op along the channel dim, iff the corresponding
+    // scalable flag is set.
+    auto maybeMaskXferOp = [&](ArrayRef<int64_t> maskShape,
+                               ArrayRef<bool> scalableDims,
+                               Operation *opToMask) {
+      if (!useMasking)
+        return opToMask;
+      auto maskType =
+          VectorType::get(maskShape, rewriter.getI1Type(), scalableDims);
+
+      SmallVector<OpFoldResult> mixedDims = vector::getMixedSizesXfer(
+          cast<LinalgOp>(op).hasPureTensorSemantics(), opToMask, rewriter);
+
+      Value maskOp =
+          rewriter.create<vector::CreateMaskOp>(loc, maskType, mixedDims);
+
+      return mlir::vector::maskOperation(rewriter, opToMask, maskOp);
+    };
 
     // Read lhs slice of size {n, w * strideW + kw * dilationW, c} @ [0, 0,
     // 0].
     Value lhs = rewriter.create<vector::TransferReadOp>(
         loc, lhsType, lhsShaped, ValueRange{zero, zero, zero});
+    auto maybeMaskedLhs = maybeMaskXferOp(
+        lhsType.getShape(), lhsType.getScalableDims(), lhs.getDefiningOp());
+
     // Read rhs slice of size {kw, c} @ [0, 0].
     Value rhs = rewriter.create<vector::TransferReadOp>(loc, rhsType, rhsShaped,
                                                         ValueRange{zero, zero});
+    auto maybeMaskedRhs = maybeMaskXferOp(
+        rhsType.getShape(), rhsType.getScalableDims(), rhs.getDefiningOp());
+
     // Read res slice of size {n, w, c} @ [0, 0, 0].
     Value res = rewriter.create<vector::TransferReadOp>(
         loc, resType, resShaped, ValueRange{zero, zero, zero});
+    auto maybeMaskedRes = maybeMaskXferOp(
+        resType.getShape(), resType.getScalableDims(), res.getDefiningOp());
 
     //===------------------------------------------------------------------===//
     // Begin vector-only rewrite part
@@ -3186,7 +3275,7 @@ struct Conv1DGenerator
     for (int64_t kw = 0; kw < kwSize; ++kw) {
       for (int64_t w = 0; w < wSize; w += wSizeStep) {
         lhsVals.push_back(rewriter.create<vector::ExtractStridedSliceOp>(
-            loc, lhs,
+            loc, maybeMaskedLhs->getResult(0),
             /*offsets=*/ArrayRef<int64_t>{0, w * strideW + kw * dilationW, 0},
             inOutSliceSizes, inOutStrides));
       }
@@ -3194,12 +3283,13 @@ struct Conv1DGenerator
     // Extract rhs slice of size {c} @ [kw].
     for (int64_t kw = 0; kw < kwSize; ++kw) {
       rhsVals.push_back(rewriter.create<vector::ExtractOp>(
-          loc, rhs, /*offsets=*/ArrayRef<int64_t>{kw}));
+          loc, maybeMaskedRhs->getResult(0),
+          /*offsets=*/ArrayRef<int64_t>{kw}));
     }
     // Extract res slice: {n, wSizeStep, c} @ [0, w, 0].
     for (int64_t w = 0; w < wSize; w += wSizeStep) {
       resVals.push_back(rewriter.create<vector::ExtractStridedSliceOp>(
-          loc, res,
+          loc, maybeMaskedRes->getResult(0),
           /*offsets=*/ArrayRef<int64_t>{0, w, 0}, inOutSliceSizes,
           inOutStrides));
     }
@@ -3208,10 +3298,15 @@ struct Conv1DGenerator
       return kw * (wSize / wSizeStep) + w;
     };
 
+    // Note - the scalable flags are ignored as flattening combined with
+    // scalable vectorization is not supported.
     auto inOutFlattenSliceSizes =
         SmallVector<int64_t>{nSize, wSizeStep * cSize};
-    auto lhsCastType = VectorType::get(inOutFlattenSliceSizes, lhsEltType);
-    auto resCastType = VectorType::get(inOutFlattenSliceSizes, resEltType);
+    auto lhsTypeAfterFlattening =
+        VectorType::get(inOutFlattenSliceSizes, lhsEltType);
+    auto resTypeAfterFlattening =
+        VectorType::get(inOutFlattenSliceSizes, resEltType);
+
     // Compute contraction: O{n, w, c} += I{n, sw * w + dw * kw, c} * F{c}
     for (int64_t kw = 0; kw < kwSize; ++kw) {
       for (int64_t w = 0; w < wSize; w += wSizeStep) {
@@ -3221,9 +3316,9 @@ struct Conv1DGenerator
           // Flatten the input and output vectors (collapse the channel
           // dimension)
           lhsVal = rewriter.create<vector::ShapeCastOp>(
-              loc, lhsCastType, lhsVals[linearIndex(kw, w)]);
-          resVal = rewriter.create<vector::ShapeCastOp>(loc, resCastType,
-                                                        resVals[w]);
+              loc, lhsTypeAfterFlattening, lhsVals[linearIndex(kw, w)]);
+          resVal = rewriter.create<vector::ShapeCastOp>(
+              loc, resTypeAfterFlattening, resVals[w]);
         }
         resVals[w] = depthwiseConv1dSliceAsMulAcc(rewriter, loc, lhsVal,
                                                   rhsVals[kw], resVal, flatten);
@@ -3248,8 +3343,8 @@ struct Conv1DGenerator
     // Write back res slice: {n, wSizeStep, c} @ [0, w, 0].
     // This does not depend on kw.
     for (int64_t w = 0; w < wSize; w += wSizeStep) {
-      res = rewriter.create<vector::InsertStridedSliceOp>(
-          loc, resVals[w], res,
+      maybeMaskedRes = rewriter.create<vector::InsertStridedSliceOp>(
+          loc, resVals[w], maybeMaskedRes->getResult(0),
           /*offsets=*/ArrayRef<int64_t>{0, w, 0},
           /*strides=*/ArrayRef<int64_t>{1, 1, 1});
     }
@@ -3258,10 +3353,11 @@ struct Conv1DGenerator
     //===------------------------------------------------------------------===//
 
     // Write back res slice of size {n, w, c} @ [0, 0, 0].
-    return rewriter
-        .create<vector::TransferWriteOp>(loc, res, resShaped,
-                                         ValueRange{zero, zero, zero})
-        .getOperation();
+    Operation *resOut = rewriter.create<vector::TransferWriteOp>(
+        loc, maybeMaskedRes->getResult(0), resShaped,
+        ValueRange{zero, zero, zero});
+    return maybeMaskXferOp(resType.getShape(), resType.getScalableDims(),
+                           resOut);
   }
 
   /// Lower:
@@ -3278,6 +3374,10 @@ struct Conv1DGenerator
     lhs = promote(rewriter, loc, lhs, resTy);
 
     if (flatten) {
+      // NOTE: This following logic won't work for scalable vectors. For this
+      // reason, "flattening" is not supported when shapes are dynamic (this
+      // should be captured by one of the pre-conditions).
+
       // There are two options for handling the filter:
       //  * shape_cast(broadcast(filter))
       //  * broadcast(shuffle(filter))
@@ -3399,7 +3499,9 @@ struct Conv1DGenerator
 
   /// Entry point that transposes into the common form:
   ///   {{n, strideW * w + dilationW * kw, c}, {kw, c}, {n, w, c}}
-  FailureOr<Operation *> generateDilatedConv(bool flatten = false) {
+  FailureOr<Operation *> generateDilatedConv(uint64_t vecChDimSize = 0,
+                                             bool vecChDimScalableFlag = false,
+                                             bool flatten = false) {
     AffineExpr n, w, c, kw;
     bindDims(ctx, n, w, c, kw);
     if (!iters({Par(), Par(), Par(), Red()}))
@@ -3410,7 +3512,7 @@ struct Conv1DGenerator
     if (layout({/*lhsIndex*/ {n, strideW * w + dilationW * kw, c},
                 /*rhsIndex*/ {kw, c},
                 /*resIndex*/ {n, w, c}}))
-      return depthwiseConv(flatten);
+      return depthwiseConv(vecChDimSize, vecChDimScalableFlag, flatten);
 
     return rewriter.notifyMatchFailure(op, "not a depthwise::Nwc layout");
   }
@@ -3475,9 +3577,9 @@ private:
 
 /// Helper function to vectorize a LinalgOp with convolution semantics.
 // TODO: extend the generic vectorization to support windows and drop this.
-static FailureOr<Operation *>
-vectorizeConvolution(RewriterBase &rewriter, LinalgOp op,
-                     bool flatten1DDepthwiseConv) {
+static FailureOr<Operation *> vectorizeConvolution(
+    RewriterBase &rewriter, LinalgOp op, ArrayRef<int64_t> inputVecSizes,
+    ArrayRef<bool> inputScalableVecDims, bool flatten1DDepthwiseConv) {
   // The ConvolutionOpInterface gives us guarantees of existence for
   // strides/dilations. However, we do not need to rely on those, we can
   // simply use them if present, otherwise use the default and let the generic
@@ -3502,7 +3604,28 @@ vectorizeConvolution(RewriterBase &rewriter, LinalgOp op,
   res = e.generateNcwPooling();
   if (succeeded(res))
     return res;
-  return e.generateDilatedConv(flatten1DDepthwiseConv);
+
+  // Only depthwise 1D NWC convs are left - these can be vectorized using masks
+  // and scalable vectors. Note that ATM the only dim that can be dynamic (i.e.
+  // masked/scalable) is the channel dim (i.e. the trailing dim).
+  uint64_t vecChDimSize = ShapedType::kDynamic;
+  bool vecChDimScalableFlag = false;
+  if (!inputVecSizes.empty()) {
+    // Only use the input vector size corresponding to the channel dim. Other
+    // vector dims will be inferred from the Ops.
+    assert((isa<linalg::DepthwiseConv1DNwcWcOp>(*op) ||
+            isa<linalg::DepthwiseConv1DNcwCwOp>(*op)) &&
+           "Not a 1D depthwise conv!");
+    size_t chDimIdx =
+        TypeSwitch<Operation *, size_t>(op)
+            .Case<linalg::DepthwiseConv1DNwcWcOp>([](auto conv) { return 2; })
+            .Case<linalg::DepthwiseConv1DNcwCwOp>([](auto conv) { return 1; });
+
+    vecChDimSize = inputVecSizes[chDimIdx];
+    vecChDimScalableFlag = inputScalableVecDims[chDimIdx];
+  }
+  return e.generateDilatedConv(vecChDimSize, vecChDimScalableFlag,
+                               flatten1DDepthwiseConv);
 }
 
 struct VectorizeConvolution : public OpInterfaceRewritePattern<LinalgOp> {
diff --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
index 962cb28..428c1c3 100644
--- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
+++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
@@ -39,14 +39,24 @@ using namespace mlir;
 using namespace mlir::math;
 using namespace mlir::vector;
 
+// Helper to encapsulate a vector's shape (including scalable dims).
+struct VectorShape {
+  ArrayRef<int64_t> sizes;
+  ArrayRef<bool> scalableFlags;
+
+  bool empty() const { return sizes.empty(); }
+};
+
 // Returns vector shape if the type is a vector. Returns an empty shape if it is
 // not a vector.
-static ArrayRef<int64_t> vectorShape(Type type) {
+static VectorShape vectorShape(Type type) {
   auto vectorType = dyn_cast<VectorType>(type);
-  return vectorType ? vectorType.getShape() : ArrayRef<int64_t>();
+  return vectorType
+             ? VectorShape{vectorType.getShape(), vectorType.getScalableDims()}
+             : VectorShape{};
 }
 
-static ArrayRef<int64_t> vectorShape(Value value) {
+static VectorShape vectorShape(Value value) {
   return vectorShape(value.getType());
 }
 
@@ -55,14 +65,16 @@ static ArrayRef<int64_t> vectorShape(Value value) {
 //----------------------------------------------------------------------------//
 
 // Broadcasts scalar type into vector type (iff shape is non-scalar).
-static Type broadcast(Type type, ArrayRef<int64_t> shape) {
+static Type broadcast(Type type, VectorShape shape) {
   assert(!isa<VectorType>(type) && "must be scalar type");
-  return !shape.empty() ? VectorType::get(shape, type) : type;
+  return !shape.empty()
+             ? VectorType::get(shape.sizes, type, shape.scalableFlags)
+             : type;
 }
 
 // Broadcasts scalar value into vector (iff shape is non-scalar).
 static Value broadcast(ImplicitLocOpBuilder &builder, Value value,
-                       ArrayRef<int64_t> shape) {
+                       VectorShape shape) {
   assert(!isa<VectorType>(value.getType()) && "must be scalar value");
   auto type = broadcast(value.getType(), shape);
   return !shape.empty() ? builder.create<BroadcastOp>(type, value) : value;
@@ -215,7 +227,7 @@ static Value clamp(ImplicitLocOpBuilder &builder, Value value, Value lowerBound,
 static std::pair<Value, Value> frexp(ImplicitLocOpBuilder &builder, Value arg,
                                      bool isPositive = false) {
   assert(getElementTypeOrSelf(arg).isF32() && "arg must be f32 type");
-  ArrayRef<int64_t> shape = vectorShape(arg);
+  VectorShape shape = vectorShape(arg);
 
   auto bcast = [&](Value value) -> Value {
     return broadcast(builder, value, shape);
@@ -255,7 +267,7 @@ static std::pair<Value, Value> frexp(ImplicitLocOpBuilder &builder, Value arg,
 // Computes exp2 for an i32 argument.
 static Value exp2I32(ImplicitLocOpBuilder &builder, Value arg) {
   assert(getElementTypeOrSelf(arg).isInteger(32) && "arg must be i32 type");
-  ArrayRef<int64_t> shape = vectorShape(arg);
+  VectorShape shape = vectorShape(arg);
 
   auto bcast = [&](Value value) -> Value {
     return broadcast(builder, value, shape);
@@ -281,7 +293,7 @@ Value makePolynomialCalculation(ImplicitLocOpBuilder &builder,
   Type elementType = getElementTypeOrSelf(x);
   assert((elementType.isF32() || elementType.isF16()) &&
          "x must be f32 or f16 type");
-  ArrayRef<int64_t> shape = vectorShape(x);
+  VectorShape shape = vectorShape(x);
 
   if (coeffs.empty())
     return broadcast(builder, floatCst(builder, 0.0f, elementType), shape);
@@ -379,7 +391,7 @@ AtanApproximation::matchAndRewrite(math::AtanOp op,
   if (!getElementTypeOrSelf(operand).isF32())
     return rewriter.notifyMatchFailure(op, "unsupported operand type");
 
-  ArrayRef<int64_t> shape = vectorShape(op.getOperand());
+  VectorShape shape = vectorShape(op.getOperand());
 
   ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
   Value abs = builder.create<math::AbsFOp>(operand);
@@ -478,7 +490,7 @@ Atan2Approximation::matchAndRewrite(math::Atan2Op op,
     return rewriter.notifyMatchFailure(op, "unsupported operand type");
 
   ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
-  ArrayRef<int64_t> shape = vectorShape(op.getResult());
+  VectorShape shape = vectorShape(op.getResult());
 
   // Compute atan in the valid range.
   auto div = builder.create<arith::DivFOp>(y, x);
@@ -544,7 +556,7 @@ TanhApproximation::matchAndRewrite(math::TanhOp op,
   if (!getElementTypeOrSelf(op.getOperand()).isF32())
     return rewriter.notifyMatchFailure(op, "unsupported operand type");
 
-  ArrayRef<int64_t> shape = vectorShape(op.getOperand());
+  VectorShape shape = vectorShape(op.getOperand());
 
   ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
   auto bcast = [&](Value value) -> Value {
@@ -632,7 +644,7 @@ LogApproximationBase<Op>::logMatchAndRewrite(Op op, PatternRewriter &rewriter,
   if (!getElementTypeOrSelf(op.getOperand()).isF32())
     return rewriter.notifyMatchFailure(op, "unsupported operand type");
 
-  ArrayRef<int64_t> shape = vectorShape(op.getOperand());
+  VectorShape shape = vectorShape(op.getOperand());
 
   ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
   auto bcast = [&](Value value) -> Value {
@@ -779,7 +791,7 @@ Log1pApproximation::matchAndRewrite(math::Log1pOp op,
   if (!getElementTypeOrSelf(op.getOperand()).isF32())
     return rewriter.notifyMatchFailure(op, "unsupported operand type");
 
-  ArrayRef<int64_t> shape = vectorShape(op.getOperand());
+  VectorShape shape = vectorShape(op.getOperand());
 
   ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
   auto bcast = [&](Value value) -> Value {
@@ -829,7 +841,7 @@ ErfPolynomialApproximation::matchAndRewrite(math::ErfOp op,
   if (!(elementType.isF32() || elementType.isF16()))
     return rewriter.notifyMatchFailure(op,
                                        "only f32 and f16 type is supported.");
-  ArrayRef<int64_t> shape = vectorShape(operand);
+  VectorShape shape = vectorShape(operand);
 
   ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
   auto bcast = [&](Value value) -> Value {
@@ -938,9 +950,8 @@ ErfPolynomialApproximation::matchAndRewrite(math::ErfOp op,
 
 namespace {
 
-Value clampWithNormals(ImplicitLocOpBuilder &builder,
-                       const llvm::ArrayRef<int64_t> shape, Value value,
-                       float lowerBound, float upperBound) {
+Value clampWithNormals(ImplicitLocOpBuilder &builder, const VectorShape shape,
+                       Value value, float lowerBound, float upperBound) {
   assert(!std::isnan(lowerBound));
   assert(!std::isnan(upperBound));
 
@@ -1131,7 +1142,7 @@ ExpM1Approximation::matchAndRewrite(math::ExpM1Op op,
   if (!getElementTypeOrSelf(op.getOperand()).isF32())
     return rewriter.notifyMatchFailure(op, "unsupported operand type");
 
-  ArrayRef<int64_t> shape = vectorShape(op.getOperand());
+  VectorShape shape = vectorShape(op.getOperand());
 
   ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
   auto bcast = [&](Value value) -> Value {
@@ -1201,7 +1212,7 @@ LogicalResult SinAndCosApproximation<isSine, OpTy>::matchAndRewrite(
   if (!getElementTypeOrSelf(op.getOperand()).isF32())
     return rewriter.notifyMatchFailure(op, "unsupported operand type");
 
-  ArrayRef<int64_t> shape = vectorShape(op.getOperand());
+  VectorShape shape = vectorShape(op.getOperand());
 
   ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
   auto bcast = [&](Value value) -> Value {
@@ -1328,7 +1339,7 @@ CbrtApproximation::matchAndRewrite(math::CbrtOp op,
     return rewriter.notifyMatchFailure(op, "unsupported operand type");
 
   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
-  ArrayRef<int64_t> shape = vectorShape(operand);
+  VectorShape shape = vectorShape(operand);
 
   Type floatTy = getElementTypeOrSelf(operand.getType());
   Type intTy = b.getIntegerType(floatTy.getIntOrFloatBitWidth());
@@ -1417,10 +1428,10 @@ RsqrtApproximation::matchAndRewrite(math::RsqrtOp op,
   if (!getElementTypeOrSelf(op.getOperand()).isF32())
     return rewriter.notifyMatchFailure(op, "unsupported operand type");
 
-  ArrayRef<int64_t> shape = vectorShape(op.getOperand());
+  VectorShape shape = vectorShape(op.getOperand());
 
   // Only support already-vectorized rsqrt's.
-  if (shape.empty() || shape.back() % 8 != 0)
+  if (shape.empty() || shape.sizes.back() % 8 != 0)
     return rewriter.notifyMatchFailure(op, "unsupported operand type");
 
   ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/BufferizableOpInterfaceImpl.cpp
index a942a72..7734d1d 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -187,6 +187,35 @@ struct DisassembleOpInterface
   }
 };
 
+struct ForeachOpInterface : public SparseBufferizableOpInterfaceExternalModel<
+                                ForeachOpInterface, sparse_tensor::ForeachOp> {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
+                              const AnalysisState &state) const {
+    return true;
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
+                               const AnalysisState &state) const {
+    return false;
+  }
+
+  AliasingValueList getAliasingValues(Operation *op, OpOperand &opOperand,
+                                      const AnalysisState &state) const {
+    return {};
+  }
+
+  LogicalResult verifyAnalysis(Operation *op,
+                               const AnalysisState &state) const {
+    // A more complex analysis (similar to scf.for) is needed if the op returns
+    // a tensor. That tensor would have to be bufferized (not implemented yet).
+    for (OpResult result : op->getResults()) {
+      if (isa<TensorType>(result.getType()))
+        return op->emitOpError("tensor results are not supported yet");
+    }
+    return success();
+  }
+};
+
 struct NumberOfEntriesOpInterface
     : public SparseBufferizableOpInterfaceExternalModel<
           NumberOfEntriesOpInterface, sparse_tensor::NumberOfEntriesOp> {
@@ -307,6 +336,7 @@ void mlir::sparse_tensor::registerBufferizableOpInterfaceExternalModels(
         NumberOfEntriesOpInterface>(*ctx);
     sparse_tensor::AssembleOp::attachInterface<AssembleOpInterface>(*ctx);
     sparse_tensor::DisassembleOp::attachInterface<DisassembleOpInterface>(*ctx);
+    sparse_tensor::ForeachOp::attachInterface<ForeachOpInterface>(*ctx);
     sparse_tensor::ToCoordinatesBufferOp::attachInterface<
         ToCoordinatesBufferOpInterface>(*ctx);
     sparse_tensor::ToCoordinatesOp::attachInterface<ToCoordinatesOpInterface>(
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
index 7ff2fc2..5679f27 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
@@ -1561,6 +1561,19 @@ struct SparseNewConverter : public OpConversionPattern<NewOp> {
   }
 };
 
+struct SparseHasRuntimeLibraryConverter
+    : public OpConversionPattern<HasRuntimeLibraryOp> {
+  using OpConversionPattern::OpConversionPattern;
+  LogicalResult
+  matchAndRewrite(HasRuntimeLibraryOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto i1Type = rewriter.getI1Type();
+    rewriter.replaceOpWithNewOp<arith::ConstantOp>(
+        op, i1Type, rewriter.getIntegerAttr(i1Type, 0));
+    return success();
+  }
+};
+
 } // namespace
 
 //===----------------------------------------------------------------------===//
@@ -1572,21 +1585,21 @@ struct SparseNewConverter : public OpConversionPattern<NewOp> {
 void mlir::populateSparseTensorCodegenPatterns(
     TypeConverter &typeConverter, RewritePatternSet &patterns,
     bool createSparseDeallocs, bool enableBufferInitialization) {
-  patterns.add<SparseAssembleOpConverter, SparseDisassembleOpConverter,
-               SparseReturnConverter, SparseCallConverter, SparseLvlOpConverter,
-               SparseCastConverter, SparseExtractSliceConverter,
-               SparseTensorLoadConverter, SparseExpandConverter,
-               SparseCompressConverter, SparseInsertConverter,
-               SparseReorderCOOConverter, SparseReMapConverter,
-               SparseSliceGetterOpConverter<ToSliceOffsetOp,
-                                            StorageSpecifierKind::DimOffset>,
-               SparseSliceGetterOpConverter<ToSliceStrideOp,
-                                            StorageSpecifierKind::DimStride>,
-               SparseToPositionsConverter, SparseToCoordinatesConverter,
-               SparseToCoordinatesBufferConverter, SparseToValuesConverter,
-               SparseConvertConverter, SparseNewConverter,
-               SparseNumberOfEntriesConverter>(typeConverter,
-                                               patterns.getContext());
+  patterns.add<
+      SparseAssembleOpConverter, SparseDisassembleOpConverter,
+      SparseReturnConverter, SparseCallConverter, SparseLvlOpConverter,
+      SparseCastConverter, SparseExtractSliceConverter,
+      SparseTensorLoadConverter, SparseExpandConverter, SparseCompressConverter,
+      SparseInsertConverter, SparseReorderCOOConverter, SparseReMapConverter,
+      SparseSliceGetterOpConverter<ToSliceOffsetOp,
+                                   StorageSpecifierKind::DimOffset>,
+      SparseSliceGetterOpConverter<ToSliceStrideOp,
+                                   StorageSpecifierKind::DimStride>,
+      SparseToPositionsConverter, SparseToCoordinatesConverter,
+      SparseToCoordinatesBufferConverter, SparseToValuesConverter,
+      SparseConvertConverter, SparseNewConverter,
+      SparseNumberOfEntriesConverter, SparseHasRuntimeLibraryConverter>(
+      typeConverter, patterns.getContext());
   patterns.add<SparseTensorDeallocConverter>(
       typeConverter, patterns.getContext(), createSparseDeallocs);
   patterns.add<SparseTensorAllocConverter, SparseTensorEmptyConverter>(
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 0937c10..92c98b3 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -840,6 +840,19 @@ public:
   }
 };
 
+struct SparseHasRuntimeLibraryConverter
+    : public OpConversionPattern<HasRuntimeLibraryOp> {
+  using OpConversionPattern::OpConversionPattern;
+  LogicalResult
+  matchAndRewrite(HasRuntimeLibraryOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto i1Type = rewriter.getI1Type();
+    rewriter.replaceOpWithNewOp<arith::ConstantOp>(
+        op, i1Type, rewriter.getIntegerAttr(i1Type, 1));
+    return success();
+  }
+};
+
 } // namespace
 
 //===----------------------------------------------------------------------===//
@@ -868,6 +881,7 @@ void mlir::populateSparseTensorConversionPatterns(TypeConverter &typeConverter,
            SparseTensorToValuesConverter, SparseNumberOfEntriesConverter,
            SparseTensorLoadConverter, SparseTensorInsertConverter,
            SparseTensorExpandConverter, SparseTensorCompressConverter,
-           SparseTensorAssembleConverter, SparseTensorDisassembleConverter>(
-          typeConverter, patterns.getContext());
+           SparseTensorAssembleConverter, SparseTensorDisassembleConverter,
+           SparseHasRuntimeLibraryConverter>(typeConverter,
+                                             patterns.getContext());
 }
diff --git a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp
index 71a9d61..fe2eea5 100644
--- a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp
+++ b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp
@@ -1278,14 +1278,11 @@ void transform::TrackingListener::notifyMatchFailure(
 }
 
 void transform::TrackingListener::notifyOperationErased(Operation *op) {
-  // TODO: Walk can be removed when D144193 has landed.
-  op->walk([&](Operation *op) {
-    // Remove mappings for result values.
-    for (OpResult value : op->getResults())
-      (void)replacePayloadValue(value, nullptr);
-    // Remove mapping for op.
-    (void)replacePayloadOp(op, nullptr);
-  });
+  // Remove mappings for result values.
+  for (OpResult value : op->getResults())
+    (void)replacePayloadValue(value, nullptr);
+  // Remove mapping for op.
+  (void)replacePayloadOp(op, nullptr);
 }
 
 void transform::TrackingListener::notifyOperationReplaced(
diff --git a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
index d613672..63ed094 100644
--- a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
+++ b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
@@ -300,3 +300,20 @@ vector::createUnrollIterator(VectorType vType, int64_t targetRank) {
   shapeToUnroll = shapeToUnroll.slice(0, firstScalableDim);
   return StaticTileOffsetRange(shapeToUnroll, /*unrollStep=*/1);
 }
+
+SmallVector<OpFoldResult> vector::getMixedSizesXfer(bool hasTensorSemantics,
+                                                    Operation *xfer,
+                                                    RewriterBase &rewriter) {
+  auto loc = xfer->getLoc();
+
+  Value base = TypeSwitch<Operation *, Value>(xfer)
+                   .Case<vector::TransferReadOp>(
+                       [&](auto readOp) { return readOp.getSource(); })
+                   .Case<vector::TransferWriteOp>(
+                       [&](auto writeOp) { return writeOp.getOperand(1); });
+
+  SmallVector<OpFoldResult> mixedSourceDims =
+      hasTensorSemantics ? tensor::getMixedSizes(rewriter, loc, base)
+                         : memref::getMixedSizes(rewriter, loc, base);
+  return mixedSourceDims;
+}
diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
index 731abcb..8835056 100644
--- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
@@ -127,7 +127,7 @@ extern "C" {
     case Action::kPack: {                                                      \
       assert(ptr && "Received nullptr for SparseTensorStorage object");        \
       intptr_t *buffers = static_cast<intptr_t *>(ptr);                        \
-      return SparseTensorStorage<P, C, V>::packFromLvlBuffers(                 \
+      return SparseTensorStorage<P, C, V>::newFromBuffers(                     \
           dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim,    \
           dimRank, buffers);                                                   \
     }                                                                          \
diff --git a/mlir/lib/Support/ToolUtilities.cpp b/mlir/lib/Support/ToolUtilities.cpp
index ee0214f..f05b4f5 100644
--- a/mlir/lib/Support/ToolUtilities.cpp
+++ b/mlir/lib/Support/ToolUtilities.cpp
@@ -21,22 +21,20 @@ using namespace mlir;
 LogicalResult
 mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                             ChunkBufferHandler processChunkBuffer,
-                            raw_ostream &os, bool enableSplitting,
-                            bool insertMarkerInOutput) {
+                            raw_ostream &os, llvm::StringRef inputSplitMarker,
+                            llvm::StringRef outputSplitMarker) {
   // If splitting is disabled, we process the full input buffer.
-  if (!enableSplitting)
+  if (inputSplitMarker.empty())
     return processChunkBuffer(std::move(originalBuffer), os);
 
-  const char splitMarkerConst[] = "// -----";
-  StringRef splitMarker(splitMarkerConst);
-  const int splitMarkerLen = splitMarker.size();
+  const int inputSplitMarkerLen = inputSplitMarker.size();
 
   auto *origMemBuffer = originalBuffer.get();
   SmallVector<StringRef, 8> rawSourceBuffers;
   const int checkLen = 2;
   // Split dropping the last checkLen chars to enable flagging near misses.
   origMemBuffer->getBuffer().split(rawSourceBuffers,
-                                   splitMarker.drop_back(checkLen));
+                                   inputSplitMarker.drop_back(checkLen));
   if (rawSourceBuffers.empty())
     return success();
 
@@ -58,8 +56,9 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
     }
 
     // Check that suffix is as expected and doesn't have any dash post.
-    bool expectedSuffix = buffer.starts_with(splitMarker.take_back(checkLen)) &&
-                          buffer.size() > checkLen && buffer[checkLen] != '0';
+    bool expectedSuffix =
+        buffer.starts_with(inputSplitMarker.take_back(checkLen)) &&
+        buffer.size() > checkLen && buffer[checkLen] != '0';
     if (expectedSuffix) {
       sourceBuffers.push_back(prev);
       prev = buffer.drop_front(checkLen);
@@ -69,8 +68,8 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
       fileSourceMgr.PrintMessage(llvm::errs(), splitLoc,
                                  llvm::SourceMgr::DK_Warning,
                                  "near miss with file split marker");
-      prev = StringRef(prev.data(),
-                       prev.size() + splitMarkerLen - checkLen + buffer.size());
+      prev = StringRef(prev.data(), prev.size() + inputSplitMarkerLen -
+                                        checkLen + buffer.size());
     }
   }
   if (!prev.empty())
@@ -89,7 +88,7 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
       hadFailure = true;
   };
   llvm::interleave(sourceBuffers, os, interleaveFn,
-                   insertMarkerInOutput ? "\n// -----\n" : "");
+                   (llvm::Twine(outputSplitMarker) + "\n").str());
 
   // If any fails, then return a failure of the tool.
   return failure(hadFailure);
diff --git a/mlir/lib/Target/Cpp/CMakeLists.txt b/mlir/lib/Target/Cpp/CMakeLists.txt
index d8f372c..578cb2f 100644
--- a/mlir/lib/Target/Cpp/CMakeLists.txt
+++ b/mlir/lib/Target/Cpp/CMakeLists.txt
@@ -10,8 +10,6 @@ add_mlir_translation_library(MLIRTargetCpp
   MLIREmitCDialect
   MLIRFuncDialect
   MLIRIR
-  MLIRMathDialect
-  MLIRSCFDialect
   MLIRSupport
   MLIRTranslateLib
   )
diff --git a/mlir/lib/Target/Cpp/TranslateRegistration.cpp b/mlir/lib/Target/Cpp/TranslateRegistration.cpp
index 4104b17..1aa9883 100644
--- a/mlir/lib/Target/Cpp/TranslateRegistration.cpp
+++ b/mlir/lib/Target/Cpp/TranslateRegistration.cpp
@@ -9,8 +9,6 @@
 #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
 #include "mlir/Dialect/EmitC/IR/EmitC.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "mlir/Dialect/Math/IR/Math.h"
-#include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/Target/Cpp/CppEmitter.h"
@@ -42,9 +40,7 @@ void registerToCppTranslation() {
         // clang-format off
         registry.insert<cf::ControlFlowDialect,
                         emitc::EmitCDialect,
-                        func::FuncDialect,
-                        math::MathDialect,
-                        scf::SCFDialect>();
+                        func::FuncDialect>();
         // clang-format on
       });
 }
diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
index 7cbb1e9..bc49d7cd 100644
--- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp
+++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
@@ -18,11 +18,13 @@
 #include "mlir/Support/LLVM.h"
 #include "mlir/Target/Cpp/CppEmitter.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormatVariadic.h"
+#include <stack>
 #include <utility>
 
 #define DEBUG_TYPE "translate-to-cpp"
@@ -172,6 +174,9 @@ struct CppEmitter {
   /// Return the existing or a new name for a Value.
   StringRef getOrCreateName(Value val);
 
+  // Returns the textual representation of a subscript operation.
+  std::string getSubscriptName(emitc::SubscriptOp op);
+
   /// Return the existing or a new label of a Block.
   StringRef getOrCreateName(Block &block);
 
@@ -341,8 +346,7 @@ static LogicalResult printOperation(CppEmitter &emitter,
 
 static LogicalResult printOperation(CppEmitter &emitter,
                                     emitc::AssignOp assignOp) {
-  auto variableOp = cast<emitc::VariableOp>(assignOp.getVar().getDefiningOp());
-  OpResult result = variableOp->getResult(0);
+  OpResult result = assignOp.getVar().getDefiningOp()->getResult(0);
 
   if (failed(emitter.emitVariableAssignment(result)))
     return failure();
@@ -350,6 +354,13 @@ static LogicalResult printOperation(CppEmitter &emitter,
   return emitter.emitOperand(assignOp.getValue());
 }
 
+static LogicalResult printOperation(CppEmitter &emitter,
+                                    emitc::SubscriptOp subscriptOp) {
+  // Add name to cache so that `hasValueInScope` works.
+  emitter.getOrCreateName(subscriptOp.getResult());
+  return success();
+}
+
 static LogicalResult printBinaryOperation(CppEmitter &emitter,
                                           Operation *operation,
                                           StringRef binaryOperator) {
@@ -1091,12 +1102,28 @@ CppEmitter::CppEmitter(raw_ostream &os, bool declareVariablesAtTop)
   labelInScopeCount.push(0);
 }
 
+std::string CppEmitter::getSubscriptName(emitc::SubscriptOp op) {
+  std::string out;
+  llvm::raw_string_ostream ss(out);
+  ss << getOrCreateName(op.getArray());
+  for (auto index : op.getIndices()) {
+    ss << "[" << getOrCreateName(index) << "]";
+  }
+  return out;
+}
+
 /// Return the existing or a new name for a Value.
 StringRef CppEmitter::getOrCreateName(Value val) {
   if (auto literal = dyn_cast_if_present<emitc::LiteralOp>(val.getDefiningOp()))
     return literal.getValue();
-  if (!valueMapper.count(val))
-    valueMapper.insert(val, formatv("v{0}", ++valueInScopeCount.top()));
+  if (!valueMapper.count(val)) {
+    if (auto subscript =
+            dyn_cast_if_present<emitc::SubscriptOp>(val.getDefiningOp())) {
+      valueMapper.insert(val, getSubscriptName(subscript));
+    } else {
+      valueMapper.insert(val, formatv("v{0}", ++valueInScopeCount.top()));
+    }
+  }
   return *valueMapper.begin(val);
 }
 
@@ -1336,6 +1363,8 @@ LogicalResult CppEmitter::emitVariableAssignment(OpResult result) {
 
 LogicalResult CppEmitter::emitVariableDeclaration(OpResult result,
                                                   bool trailingSemicolon) {
+  if (isa<emitc::SubscriptOp>(result.getDefiningOp()))
+    return success();
   if (hasValueInScope(result)) {
     return result.getDefiningOp()->emitError(
         "result variable for the operation already declared");
@@ -1411,7 +1440,7 @@ LogicalResult CppEmitter::emitOperation(Operation &op, bool trailingSemicolon) {
                 emitc::DivOp, emitc::ExpressionOp, emitc::ForOp, emitc::FuncOp,
                 emitc::IfOp, emitc::IncludeOp, emitc::LogicalAndOp,
                 emitc::LogicalNotOp, emitc::LogicalOrOp, emitc::MulOp,
-                emitc::RemOp, emitc::ReturnOp, emitc::SubOp,
+                emitc::RemOp, emitc::ReturnOp, emitc::SubOp, emitc::SubscriptOp,
                 emitc::UnaryMinusOp, emitc::UnaryPlusOp, emitc::VariableOp,
                 emitc::VerbatimOp>(
               [&](auto op) { return printOperation(*this, op); })
@@ -1426,7 +1455,7 @@ LogicalResult CppEmitter::emitOperation(Operation &op, bool trailingSemicolon) {
   if (failed(status))
     return failure();
 
-  if (isa<emitc::LiteralOp>(op))
+  if (isa<emitc::LiteralOp, emitc::SubscriptOp>(op))
     return success();
 
   if (getEmittedExpression() ||
diff --git a/mlir/lib/Target/LLVMIR/DebugImporter.cpp b/mlir/lib/Target/LLVMIR/DebugImporter.cpp
index c631617..5ba90bb 100644
--- a/mlir/lib/Target/LLVMIR/DebugImporter.cpp
+++ b/mlir/lib/Target/LLVMIR/DebugImporter.cpp
@@ -13,6 +13,7 @@
 #include "mlir/IR/Location.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/TypeSwitch.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
@@ -51,10 +52,9 @@ DICompileUnitAttr DebugImporter::translateImpl(llvm::DICompileUnit *node) {
   std::optional<DIEmissionKind> emissionKind =
       symbolizeDIEmissionKind(node->getEmissionKind());
   return DICompileUnitAttr::get(
-      context, DistinctAttr::create(UnitAttr::get(context)),
-      node->getSourceLanguage(), translate(node->getFile()),
-      getStringAttrOrNull(node->getRawProducer()), node->isOptimized(),
-      emissionKind.value());
+      context, getOrCreateDistinctID(node), node->getSourceLanguage(),
+      translate(node->getFile()), getStringAttrOrNull(node->getRawProducer()),
+      node->isOptimized(), emissionKind.value());
 }
 
 DICompositeTypeAttr DebugImporter::translateImpl(llvm::DICompositeType *node) {
@@ -64,11 +64,7 @@ DICompositeTypeAttr DebugImporter::translateImpl(llvm::DICompositeType *node) {
     assert(element && "expected a non-null element type");
     elements.push_back(translate(element));
   }
-  // Drop the elements parameter if a cyclic dependency is detected. We
-  // currently cannot model these cycles and thus drop the parameter if
-  // required. A cyclic dependency is detected if one of the element nodes
-  // translates to a nullptr since the node is already on the translation stack.
-  // TODO: Support debug metadata with cyclic dependencies.
+  // Drop the elements parameter if any of the elements are invalid.
   if (llvm::is_contained(elements, nullptr))
     elements.clear();
   DITypeAttr baseType = translate(node->getBaseType());
@@ -77,14 +73,15 @@ DICompositeTypeAttr DebugImporter::translateImpl(llvm::DICompositeType *node) {
   if (node->getTag() == llvm::dwarf::DW_TAG_array_type && !baseType)
     return nullptr;
   return DICompositeTypeAttr::get(
-      context, node->getTag(), getStringAttrOrNull(node->getRawName()),
-      translate(node->getFile()), node->getLine(), translate(node->getScope()),
-      baseType, flags.value_or(DIFlags::Zero), node->getSizeInBits(),
+      context, node->getTag(), /*recId=*/{},
+      getStringAttrOrNull(node->getRawName()), translate(node->getFile()),
+      node->getLine(), translate(node->getScope()), baseType,
+      flags.value_or(DIFlags::Zero), node->getSizeInBits(),
       node->getAlignInBits(), elements);
 }
 
 DIDerivedTypeAttr DebugImporter::translateImpl(llvm::DIDerivedType *node) {
-  // Return nullptr if the base type is a cyclic dependency.
+  // Return nullptr if the base type is invalid.
   DITypeAttr baseType = translate(node->getBaseType());
   if (node->getBaseType() && !baseType)
     return nullptr;
@@ -179,10 +176,10 @@ DISubprogramAttr DebugImporter::translateImpl(llvm::DISubprogram *node) {
   // Only definitions require a distinct identifier.
   mlir::DistinctAttr id;
   if (node->isDistinct())
-    id = DistinctAttr::create(UnitAttr::get(context));
+    id = getOrCreateDistinctID(node);
   std::optional<DISubprogramFlags> subprogramFlags =
       symbolizeDISubprogramFlags(node->getSubprogram()->getSPFlags());
-  // Return nullptr if the scope or type is a cyclic dependency.
+  // Return nullptr if the scope or type is invalid.
   DIScopeAttr scope = translate(node->getScope());
   if (node->getScope() && !scope)
     return nullptr;
@@ -229,7 +226,7 @@ DebugImporter::translateImpl(llvm::DISubroutineType *node) {
     }
     types.push_back(translate(type));
   }
-  // Return nullptr if any of the types is a cyclic dependency.
+  // Return nullptr if any of the types is invalid.
   if (llvm::is_contained(types, nullptr))
     return nullptr;
   return DISubroutineTypeAttr::get(context, node->getCC(), types);
@@ -247,12 +244,42 @@ DINodeAttr DebugImporter::translate(llvm::DINode *node) {
   if (DINodeAttr attr = nodeToAttr.lookup(node))
     return attr;
 
-  // Return nullptr if a cyclic dependency is detected since the same node is
-  // being traversed twice. This check avoids infinite recursion if the debug
-  // metadata contains cycles.
-  if (!translationStack.insert(node))
-    return nullptr;
-  auto guard = llvm::make_scope_exit([&]() { translationStack.pop_back(); });
+  // If the node type is capable of being recursive, check if it's seen before.
+  auto recSelfCtor = getRecSelfConstructor(node);
+  if (recSelfCtor) {
+    // If a cyclic dependency is detected since the same node is being traversed
+    // twice, emit a recursive self type, and mark the duplicate node on the
+    // translationStack so it can emit a recursive decl type.
+    auto [iter, inserted] = translationStack.try_emplace(node, nullptr);
+    if (!inserted) {
+      // The original node may have already been assigned a recursive ID from
+      // a different self-reference. Use that if possible.
+      DistinctAttr recId = iter->second;
+      if (!recId) {
+        recId = DistinctAttr::create(UnitAttr::get(context));
+        iter->second = recId;
+      }
+      unboundRecursiveSelfRefs.back().insert(recId);
+      return cast<DINodeAttr>(recSelfCtor(recId));
+    }
+  }
+
+  unboundRecursiveSelfRefs.emplace_back();
+
+  auto guard = llvm::make_scope_exit([&]() {
+    if (recSelfCtor)
+      translationStack.pop_back();
+
+    // Copy unboundRecursiveSelfRefs down to the previous level.
+    if (unboundRecursiveSelfRefs.size() == 1)
+      assert(unboundRecursiveSelfRefs.back().empty() &&
+             "internal error: unbound recursive self reference at top level.");
+    else
+      unboundRecursiveSelfRefs[unboundRecursiveSelfRefs.size() - 2].insert(
+          unboundRecursiveSelfRefs.back().begin(),
+          unboundRecursiveSelfRefs.back().end());
+    unboundRecursiveSelfRefs.pop_back();
+  });
 
   // Convert the debug metadata if possible.
   auto translateNode = [this](llvm::DINode *node) -> DINodeAttr {
@@ -289,7 +316,19 @@ DINodeAttr DebugImporter::translate(llvm::DINode *node) {
     return nullptr;
   };
   if (DINodeAttr attr = translateNode(node)) {
-    nodeToAttr.insert({node, attr});
+    // If this node was marked as recursive, set its recId.
+    if (auto recType = dyn_cast<DIRecursiveTypeAttrInterface>(attr)) {
+      if (DistinctAttr recId = translationStack.lookup(node)) {
+        attr = cast<DINodeAttr>(recType.withRecId(recId));
+        // Remove the unbound recursive ID from the set of unbound self
+        // references in the translation stack.
+        unboundRecursiveSelfRefs.back().erase(recId);
+      }
+    }
+
+    // Only cache fully self-contained nodes.
+    if (unboundRecursiveSelfRefs.back().empty())
+      nodeToAttr.try_emplace(node, attr);
     return attr;
   }
   return nullptr;
@@ -346,3 +385,20 @@ StringAttr DebugImporter::getStringAttrOrNull(llvm::MDString *stringNode) {
     return StringAttr();
   return StringAttr::get(context, stringNode->getString());
 }
+
+DistinctAttr DebugImporter::getOrCreateDistinctID(llvm::DINode *node) {
+  DistinctAttr &id = nodeToDistinctAttr[node];
+  if (!id)
+    id = DistinctAttr::create(UnitAttr::get(context));
+  return id;
+}
+
+function_ref<DIRecursiveTypeAttrInterface(DistinctAttr)>
+DebugImporter::getRecSelfConstructor(llvm::DINode *node) {
+  using CtorType = function_ref<DIRecursiveTypeAttrInterface(DistinctAttr)>;
+  return TypeSwitch<llvm::DINode *, CtorType>(node)
+      .Case([&](llvm::DICompositeType *concreteNode) {
+        return CtorType(DICompositeTypeAttr::getRecSelf);
+      })
+      .Default(CtorType());
+}
diff --git a/mlir/lib/Target/LLVMIR/DebugImporter.h b/mlir/lib/Target/LLVMIR/DebugImporter.h
index 7d4a371..bcf628f 100644
--- a/mlir/lib/Target/LLVMIR/DebugImporter.h
+++ b/mlir/lib/Target/LLVMIR/DebugImporter.h
@@ -82,12 +82,28 @@ private:
   /// null attribute otherwise.
   StringAttr getStringAttrOrNull(llvm::MDString *stringNode);
 
+  /// Get the DistinctAttr used to represent `node` if one was already created
+  /// for it, or create a new one if not.
+  DistinctAttr getOrCreateDistinctID(llvm::DINode *node);
+
+  /// Get the `getRecSelf` constructor for the translated type of `node` if its
+  /// translated DITypeAttr supports recursion. Otherwise, returns nullptr.
+  function_ref<DIRecursiveTypeAttrInterface(DistinctAttr)>
+  getRecSelfConstructor(llvm::DINode *node);
+
   /// A mapping between LLVM debug metadata and the corresponding attribute.
   DenseMap<llvm::DINode *, DINodeAttr> nodeToAttr;
+  /// A mapping between distinct LLVM debug metadata nodes and the corresponding
+  /// distinct id attribute.
+  DenseMap<llvm::DINode *, DistinctAttr> nodeToDistinctAttr;
 
   /// A stack that stores the metadata nodes that are being traversed. The stack
   /// is used to detect cyclic dependencies during the metadata translation.
-  SetVector<llvm::DINode *> translationStack;
+  /// A node is pushed with a null value. If it is ever seen twice, it is given
+  /// a recursive id attribute, indicating that it is a recursive node.
+  llvm::MapVector<llvm::DINode *, DistinctAttr> translationStack;
+  /// All the unbound recursive self references in the translation stack.
+  SmallVector<DenseSet<DistinctAttr>> unboundRecursiveSelfRefs;
 
   MLIRContext *context;
   ModuleOp mlirModule;
diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp
index 16918aa..eaf9373 100644
--- a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp
@@ -117,11 +117,7 @@ static DINodeT *getDistinctOrUnique(bool isDistinct, Ts &&...args) {
 }
 
 llvm::DICompositeType *
-DebugTranslation::translateImpl(DICompositeTypeAttr attr) {
-  SmallVector<llvm::Metadata *> elements;
-  for (auto member : attr.getElements())
-    elements.push_back(translate(member));
-
+DebugTranslation::translateImplGetPlaceholder(DICompositeTypeAttr attr) {
   // TODO: Use distinct attributes to model this, once they have landed.
   // Depending on the tag, composite types must be distinct.
   bool isDistinct = false;
@@ -133,6 +129,8 @@ DebugTranslation::translateImpl(DICompositeTypeAttr attr) {
     isDistinct = true;
   }
 
+  llvm::TempMDTuple placeholderElements =
+      llvm::MDNode::getTemporary(llvmCtx, std::nullopt);
   return getDistinctOrUnique<llvm::DICompositeType>(
       isDistinct, llvmCtx, attr.getTag(), getMDStringOrNull(attr.getName()),
       translate(attr.getFile()), attr.getLine(), translate(attr.getScope()),
@@ -140,8 +138,23 @@ DebugTranslation::translateImpl(DICompositeTypeAttr attr) {
       attr.getAlignInBits(),
       /*OffsetInBits=*/0,
       /*Flags=*/static_cast<llvm::DINode::DIFlags>(attr.getFlags()),
-      llvm::MDNode::get(llvmCtx, elements),
-      /*RuntimeLang=*/0, /*VTableHolder=*/nullptr);
+      /*Elements=*/placeholderElements.get(), /*RuntimeLang=*/0,
+      /*VTableHolder=*/nullptr);
+}
+
+void DebugTranslation::translateImplFillPlaceholder(
+    DICompositeTypeAttr attr, llvm::DICompositeType *placeholder) {
+  SmallVector<llvm::Metadata *> elements;
+  for (DINodeAttr member : attr.getElements())
+    elements.push_back(translate(member));
+  placeholder->replaceElements(llvm::MDNode::get(llvmCtx, elements));
+}
+
+llvm::DICompositeType *
+DebugTranslation::translateImpl(DICompositeTypeAttr attr) {
+  llvm::DICompositeType *placeholder = translateImplGetPlaceholder(attr);
+  translateImplFillPlaceholder(attr, placeholder);
+  return placeholder;
 }
 
 llvm::DIDerivedType *DebugTranslation::translateImpl(DIDerivedTypeAttr attr) {
@@ -200,22 +213,71 @@ DebugTranslation::translateImpl(DIGlobalVariableAttr attr) {
       attr.getIsDefined(), nullptr, nullptr, attr.getAlignInBits(), nullptr);
 }
 
+llvm::DIType *
+DebugTranslation::translateRecursive(DIRecursiveTypeAttrInterface attr) {
+  DistinctAttr recursiveId = attr.getRecId();
+  if (attr.isRecSelf()) {
+    auto *iter = recursiveTypeMap.find(recursiveId);
+    assert(iter != recursiveTypeMap.end() && "unbound DI recursive self type");
+    return iter->second;
+  }
+
+  auto setRecursivePlaceholder = [&](llvm::DIType *placeholder) {
+    auto [iter, inserted] =
+        recursiveTypeMap.try_emplace(recursiveId, placeholder);
+    assert(inserted && "illegal reuse of recursive id");
+  };
+
+  llvm::DIType *result =
+      TypeSwitch<DIRecursiveTypeAttrInterface, llvm::DIType *>(attr)
+          .Case<DICompositeTypeAttr>([&](auto attr) {
+            auto *placeholder = translateImplGetPlaceholder(attr);
+            setRecursivePlaceholder(placeholder);
+            translateImplFillPlaceholder(attr, placeholder);
+            return placeholder;
+          });
+
+  assert(recursiveTypeMap.back().first == recursiveId &&
+         "internal inconsistency: unexpected recursive translation stack");
+  recursiveTypeMap.pop_back();
+
+  return result;
+}
+
 llvm::DIScope *DebugTranslation::translateImpl(DIScopeAttr attr) {
   return cast<llvm::DIScope>(translate(DINodeAttr(attr)));
 }
 
 llvm::DISubprogram *DebugTranslation::translateImpl(DISubprogramAttr attr) {
+  if (auto iter = distinctAttrToNode.find(attr.getId());
+      iter != distinctAttrToNode.end())
+    return cast<llvm::DISubprogram>(iter->second);
+
+  llvm::DIScope *scope = translate(attr.getScope());
+  llvm::DIFile *file = translate(attr.getFile());
+  llvm::DIType *type = translate(attr.getType());
+  llvm::DICompileUnit *compileUnit = translate(attr.getCompileUnit());
+
+  // Check again after recursive calls in case this distinct node recurses back
+  // to itself.
+  if (auto iter = distinctAttrToNode.find(attr.getId());
+      iter != distinctAttrToNode.end())
+    return cast<llvm::DISubprogram>(iter->second);
+
   bool isDefinition = static_cast<bool>(attr.getSubprogramFlags() &
                                         LLVM::DISubprogramFlags::Definition);
-  return getDistinctOrUnique<llvm::DISubprogram>(
-      isDefinition, llvmCtx, translate(attr.getScope()),
-      getMDStringOrNull(attr.getName()),
-      getMDStringOrNull(attr.getLinkageName()), translate(attr.getFile()),
-      attr.getLine(), translate(attr.getType()), attr.getScopeLine(),
+  llvm::DISubprogram *node = getDistinctOrUnique<llvm::DISubprogram>(
+      isDefinition, llvmCtx, scope, getMDStringOrNull(attr.getName()),
+      getMDStringOrNull(attr.getLinkageName()), file, attr.getLine(), type,
+      attr.getScopeLine(),
       /*ContainingType=*/nullptr, /*VirtualIndex=*/0,
       /*ThisAdjustment=*/0, llvm::DINode::FlagZero,
       static_cast<llvm::DISubprogram::DISPFlags>(attr.getSubprogramFlags()),
-      translate(attr.getCompileUnit()));
+      compileUnit);
+
+  if (attr.getId())
+    distinctAttrToNode.try_emplace(attr.getId(), node);
+  return node;
 }
 
 llvm::DIModule *DebugTranslation::translateImpl(DIModuleAttr attr) {
@@ -268,15 +330,23 @@ llvm::DINode *DebugTranslation::translate(DINodeAttr attr) {
   if (llvm::DINode *node = attrToNode.lookup(attr))
     return node;
 
-  llvm::DINode *node =
-      TypeSwitch<DINodeAttr, llvm::DINode *>(attr)
-          .Case<DIBasicTypeAttr, DICompileUnitAttr, DICompositeTypeAttr,
-                DIDerivedTypeAttr, DIFileAttr, DIGlobalVariableAttr,
-                DILabelAttr, DILexicalBlockAttr, DILexicalBlockFileAttr,
-                DILocalVariableAttr, DIModuleAttr, DINamespaceAttr,
-                DINullTypeAttr, DISubprogramAttr, DISubrangeAttr,
-                DISubroutineTypeAttr>(
-              [&](auto attr) { return translateImpl(attr); });
+  llvm::DINode *node = nullptr;
+  // Recursive types go through a dedicated handler. All other types are
+  // dispatched directly to their specific handlers.
+  if (auto recTypeAttr = dyn_cast<DIRecursiveTypeAttrInterface>(attr))
+    if (recTypeAttr.getRecId())
+      node = translateRecursive(recTypeAttr);
+
+  if (!node)
+    node = TypeSwitch<DINodeAttr, llvm::DINode *>(attr)
+               .Case<DIBasicTypeAttr, DICompileUnitAttr, DICompositeTypeAttr,
+                     DIDerivedTypeAttr, DIFileAttr, DIGlobalVariableAttr,
+                     DILabelAttr, DILexicalBlockAttr, DILexicalBlockFileAttr,
+                     DILocalVariableAttr, DIModuleAttr, DINamespaceAttr,
+                     DINullTypeAttr, DISubprogramAttr, DISubrangeAttr,
+                     DISubroutineTypeAttr>(
+                   [&](auto attr) { return translateImpl(attr); });
+
   attrToNode.insert({attr, node});
   return node;
 }
diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.h b/mlir/lib/Target/LLVMIR/DebugTranslation.h
index 627c684..d2171fe 100644
--- a/mlir/lib/Target/LLVMIR/DebugTranslation.h
+++ b/mlir/lib/Target/LLVMIR/DebugTranslation.h
@@ -88,6 +88,24 @@ private:
   llvm::DISubroutineType *translateImpl(DISubroutineTypeAttr attr);
   llvm::DIType *translateImpl(DITypeAttr attr);
 
+  /// Attributes that support self recursion need to implement two methods and
+  /// hook into the `translateImpl` overload for `DIRecursiveTypeAttr`.
+  /// - `<llvm type> translateImplGetPlaceholder(<mlir type>)`:
+  ///   Translate the DI attr without translating any potentially recursive
+  ///   nested DI attrs.
+  /// - `void translateImplFillPlaceholder(<mlir type>, <llvm type>)`:
+  ///   Given the placeholder returned by `translateImplGetPlaceholder`, fill
+  ///   any holes by recursively translating nested DI attrs. This method must
+  ///   mutate the placeholder that is passed in, instead of creating a new one.
+  llvm::DIType *translateRecursive(DIRecursiveTypeAttrInterface attr);
+
+  /// Get a placeholder DICompositeType without recursing into the elements.
+  llvm::DICompositeType *translateImplGetPlaceholder(DICompositeTypeAttr attr);
+  /// Completes the DICompositeType `placeholder` by recursively translating the
+  /// elements.
+  void translateImplFillPlaceholder(DICompositeTypeAttr attr,
+                                    llvm::DICompositeType *placeholder);
+
   /// Constructs a string metadata node from the string attribute. Returns
   /// nullptr if `stringAttr` is null or contains and empty string.
   llvm::MDString *getMDStringOrNull(StringAttr stringAttr);
@@ -102,6 +120,15 @@ private:
   /// metadata.
   DenseMap<Attribute, llvm::DINode *> attrToNode;
 
+  /// A mapping between recursive ID and the translated DIType.
+  /// DIType.
+  llvm::MapVector<DistinctAttr, llvm::DIType *> recursiveTypeMap;
+
+  /// A mapping between a distinct ID and the translated LLVM metadata node.
+  /// This helps identify attrs that should translate into the same LLVM debug
+  /// node.
+  DenseMap<DistinctAttr, llvm::DINode *> distinctAttrToNode;
+
   /// A mapping between filename and llvm debug file.
   /// TODO: Change this to DenseMap<Identifier, ...> when we can
   /// access the Identifier filename in FileLineColLoc.
diff --git a/mlir/lib/Tools/lsp-server-support/Transport.cpp b/mlir/lib/Tools/lsp-server-support/Transport.cpp
index df675cf..64dea35 100644
--- a/mlir/lib/Tools/lsp-server-support/Transport.cpp
+++ b/mlir/lib/Tools/lsp-server-support/Transport.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Tools/lsp-server-support/Transport.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/lsp-server-support/Logging.h"
 #include "mlir/Tools/lsp-server-support/Protocol.h"
 #include "llvm/ADT/SmallString.h"
@@ -347,7 +348,7 @@ LogicalResult JSONTransport::readDelimitedMessage(std::string &json) {
     StringRef lineRef = line.str().trim();
     if (lineRef.starts_with("//")) {
       // Found a delimiter for the message.
-      if (lineRef == "// -----")
+      if (lineRef == kDefaultSplitMarker)
         break;
       continue;
     }
diff --git a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
index de657a3..ed75b4a 100644
--- a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
+++ b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
@@ -15,6 +15,7 @@
 #include "mlir/IR/Operation.h"
 #include "mlir/Interfaces/FunctionInterfaces.h"
 #include "mlir/Parser/Parser.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/lsp-server-support/Logging.h"
 #include "mlir/Tools/lsp-server-support/SourceMgrUtils.h"
 #include "llvm/ADT/StringExtras.h"
@@ -1052,11 +1053,8 @@ MLIRTextFile::MLIRTextFile(const lsp::URIForFile &uri, StringRef fileContents,
   context.allowUnregisteredDialects();
 
   // Split the file into separate MLIR documents.
-  // TODO: Find a way to share the split file marker with other tools. We don't
-  // want to use `splitAndProcessBuffer` here, but we do want to make sure this
-  // marker doesn't go out of sync.
   SmallVector<StringRef, 8> subContents;
-  StringRef(contents).split(subContents, "// -----");
+  StringRef(contents).split(subContents, kDefaultSplitMarker);
   chunks.emplace_back(std::make_unique<MLIRTextFileChunk>(
       context, /*lineOffset=*/0, uri, subContents.front(), diagnostics));
 
diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
index b625571..51504ad 100644
--- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
+++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
@@ -31,6 +31,7 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Support/FileUtilities.h"
+#include "mlir/Support/LogicalResult.h"
 #include "mlir/Support/Timing.h"
 #include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/ParseUtilities.h"
@@ -127,11 +128,21 @@ struct MlirOptMainConfigCLOptions : public MlirOptMainConfig {
         cl::desc("Print the list of registered dialects and exit"),
         cl::location(showDialectsFlag), cl::init(false));
 
-    static cl::opt<bool, /*ExternalStorage=*/true> splitInputFile(
-        "split-input-file",
-        cl::desc("Split the input file into pieces and process each "
-                 "chunk independently"),
-        cl::location(splitInputFileFlag), cl::init(false));
+    static cl::opt<std::string, /*ExternalStorage=*/true> splitInputFile(
+        "split-input-file", llvm::cl::ValueOptional,
+        cl::callback([&](const std::string &str) {
+          // Implicit value: use default marker if flag was used without value.
+          if (str.empty())
+            splitInputFile.setValue(kDefaultSplitMarker);
+        }),
+        cl::desc("Split the input file into chunks using the given or "
+                 "default marker and process each chunk independently"),
+        cl::location(splitInputFileFlag), cl::init(""));
+
+    static cl::opt<std::string, /*ExternalStorage=*/true> outputSplitMarker(
+        "output-split-marker",
+        cl::desc("Split marker to use for merging the ouput"),
+        cl::location(outputSplitMarkerFlag), cl::init(kDefaultSplitMarker));
 
     static cl::opt<bool, /*ExternalStorage=*/true> verifyDiagnostics(
         "verify-diagnostics",
@@ -503,15 +514,19 @@ mlir::registerAndParseCLIOptions(int argc, char **argv,
   return std::make_pair(inputFilename.getValue(), outputFilename.getValue());
 }
 
+static LogicalResult printRegisteredDialects(DialectRegistry &registry) {
+  llvm::outs() << "Available Dialects: ";
+  interleave(registry.getDialectNames(), llvm::outs(), ",");
+  llvm::outs() << "\n";
+  return success();
+}
+
 LogicalResult mlir::MlirOptMain(llvm::raw_ostream &outputStream,
                                 std::unique_ptr<llvm::MemoryBuffer> buffer,
                                 DialectRegistry &registry,
                                 const MlirOptMainConfig &config) {
-  if (config.shouldShowDialects()) {
-    llvm::outs() << "Available Dialects: ";
-    interleave(registry.getDialectNames(), llvm::outs(), ",");
-    llvm::outs() << "\n";
-  }
+  if (config.shouldShowDialects())
+    return printRegisteredDialects(registry);
 
   // The split-input-file mode is a very specific mode that slices the file
   // up into small pieces and checks each independently.
@@ -533,8 +548,8 @@ LogicalResult mlir::MlirOptMain(llvm::raw_ostream &outputStream,
                          threadPool);
   };
   return splitAndProcessBuffer(std::move(buffer), chunkFn, outputStream,
-                               config.shouldSplitInputFile(),
-                               /*insertMarkerInOutput=*/true);
+                               config.inputSplitMarker(),
+                               config.outputSplitMarker());
 }
 
 LogicalResult mlir::MlirOptMain(int argc, char **argv,
@@ -546,6 +561,9 @@ LogicalResult mlir::MlirOptMain(int argc, char **argv,
 
   MlirOptMainConfig config = MlirOptMainConfig::createFromCLOptions();
 
+  if (config.shouldShowDialects())
+    return printRegisteredDialects(registry);
+
   // When reading from stdin and the input is a tty, it is often a user mistake
   // and the process "appears to be stuck". Print a message to let the user know
   // about it!
diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
index a5c6c2b..d282ee8 100644
--- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
+++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
@@ -10,6 +10,7 @@
 
 #include "Protocol.h"
 #include "mlir/IR/BuiltinOps.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/PDLL/AST/Context.h"
 #include "mlir/Tools/PDLL/AST/Nodes.h"
 #include "mlir/Tools/PDLL/AST/Types.h"
@@ -1621,7 +1622,8 @@ PDLTextFile::getPDLLViewOutput(lsp::PDLLViewOutputKind kind) {
         [&](PDLTextFileChunk &chunk) {
           chunk.document.getPDLLViewOutput(outputOS, kind);
         },
-        [&] { outputOS << "\n// -----\n\n"; });
+        [&] { outputOS << "\n"
+                       << kDefaultSplitMarker << "\n\n"; });
   }
   return result;
 }
@@ -1632,11 +1634,8 @@ void PDLTextFile::initialize(const lsp::URIForFile &uri, int64_t newVersion,
   chunks.clear();
 
   // Split the file into separate PDL documents.
-  // TODO: Find a way to share the split file marker with other tools. We don't
-  // want to use `splitAndProcessBuffer` here, but we do want to make sure this
-  // marker doesn't go out of sync.
   SmallVector<StringRef, 8> subContents;
-  StringRef(contents).split(subContents, "// -----");
+  StringRef(contents).split(subContents, kDefaultSplitMarker);
   chunks.emplace_back(std::make_unique<PDLTextFileChunk>(
       /*lineOffset=*/0, uri, subContents.front(), extraIncludeDirs,
       diagnostics));
diff --git a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
index 92adb8d6a..1aaf8ad 100644
--- a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
+++ b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
@@ -62,11 +62,16 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
       llvm::cl::desc("Allow operation with no registered dialects (discouraged: testing only!)"),
       llvm::cl::init(false));
 
-  static llvm::cl::opt<bool> splitInputFile(
-      "split-input-file",
-      llvm::cl::desc("Split the input file into pieces and "
-                     "process each chunk independently"),
-      llvm::cl::init(false));
+  static llvm::cl::opt<std::string> inputSplitMarker(
+      "split-input-file", llvm::cl::ValueOptional,
+      llvm::cl::callback([&](const std::string &str) {
+        // Implicit value: use default marker if flag was used without value.
+        if (str.empty())
+          inputSplitMarker.setValue(kDefaultSplitMarker);
+      }),
+      llvm::cl::desc("Split the input file into chunks using the given or "
+                     "default marker and process each chunk independently"),
+      llvm::cl::init(""));
 
   static llvm::cl::opt<bool> verifyDiagnostics(
       "verify-diagnostics",
@@ -80,6 +85,11 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
                      "(discouraged: testing only!)"),
       llvm::cl::init(false));
 
+  static llvm::cl::opt<std::string> outputSplitMarker(
+      "output-split-marker",
+      llvm::cl::desc("Split marker to use for merging the ouput"),
+      llvm::cl::init(""));
+
   llvm::InitLLVM y(argc, argv);
 
   // Add flags for all the registered translations.
@@ -176,7 +186,8 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
   };
 
   if (failed(splitAndProcessBuffer(std::move(input), processBuffer,
-                                   output->os(), splitInputFile)))
+                                   output->os(), inputSplitMarker,
+                                   outputSplitMarker)))
     return failure();
 
   output->keep();
diff --git a/mlir/test/CAPI/llvm.c b/mlir/test/CAPI/llvm.c
index 2fd98b2..48b887e 100644
--- a/mlir/test/CAPI/llvm.c
+++ b/mlir/test/CAPI/llvm.c
@@ -301,7 +301,7 @@ static void testDebugInfoAttributes(MlirContext ctx) {
 
   // CHECK: #llvm.di_composite_type<{{.*}}>
   mlirAttributeDump(mlirLLVMDICompositeTypeAttrGet(
-      ctx, 0, foo, file, 1, compile_unit, di_type, 0, 64, 8, 1, &di_type));
+      ctx, 0, id, foo, file, 1, compile_unit, di_type, 0, 64, 8, 1, &di_type));
 
   MlirAttribute subroutine_type =
       mlirLLVMDISubroutineTypeAttrGet(ctx, 0x0, 1, &di_type);
diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
index c80ba48..5918ff2 100644
--- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
+++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
@@ -708,11 +708,60 @@ func.func @complex_tanh(%arg: complex<f32>) -> complex<f32> {
 // -----
 
 // CHECK-LABEL: func @complex_sqrt
+// CHECK-SAME: %[[ARG:.*]]: complex<f32>
 func.func @complex_sqrt(%arg: complex<f32>) -> complex<f32> {
   %sqrt = complex.sqrt %arg : complex<f32>
   return %sqrt : complex<f32>
 }
 
+// CHECK: %[[CST:.*]]  = arith.constant 0.000000e+00 : f32
+// CHECK: %[[VAR0:.*]] = complex.re %[[ARG]] : complex<f32>
+// CHECK: %[[VAR1:.*]] = complex.im %[[ARG]] : complex<f32>
+// CHECK: %[[VAR2:.*]] = math.absf %[[VAR0]] : f32
+// CHECK: %[[CST0:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK: %[[CST1:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK: %[[VAR3:.*]] = complex.re %[[ARG]] : complex<f32>
+// CHECK: %[[VAR4:.*]] = complex.im %[[ARG]] : complex<f32>
+// CHECK: %[[VAR5:.*]] = arith.cmpf oeq, %[[VAR3]], %[[CST0]] : f32
+// CHECK: %[[VAR6:.*]] = arith.cmpf oeq, %[[VAR4]], %[[CST0]] : f32
+// CHECK: %[[VAR7:.*]] = arith.divf %[[VAR4]], %[[VAR3]] : f32
+// CHECK: %[[VAR8:.*]] = arith.mulf %[[VAR7]], %[[VAR7]] : f32
+// CHECK: %[[VAR9:.*]] = arith.addf %[[VAR8]], %[[CST1]] : f32
+// CHECK: %[[VAR10:.*]] = math.sqrt %[[VAR9]] : f32
+// CHECK: %[[VAR11:.*]] = math.absf %[[VAR3]] : f32
+// CHECK: %[[VAR12:.*]] = arith.mulf %[[VAR10]], %[[VAR11]] : f32
+// CHECK: %[[VAR13:.*]] = arith.divf %[[VAR3]], %[[VAR4]] : f32
+// CHECK: %[[VAR14:.*]] = arith.mulf %[[VAR13]], %[[VAR13]] : f32
+// CHECK: %[[VAR15:.*]] = arith.addf %[[VAR14]], %[[CST1]] : f32
+// CHECK: %[[VAR16:.*]] = math.sqrt %[[VAR15]] : f32
+// CHECK: %[[VAR17:.*]] = math.absf %[[VAR4]] : f32
+// CHECK: %[[VAR18:.*]] = arith.mulf %[[VAR16]], %[[VAR17]] : f32
+// CHECK: %[[VAR19:.*]] = arith.cmpf ogt, %[[VAR3]], %[[VAR4]] : f32
+// CHECK: %[[VAR20:.*]] = arith.select %[[VAR19]], %[[VAR12]], %[[VAR18]] : f32
+// CHECK: %[[VAR21:.*]] = arith.select %[[VAR6]], %[[VAR11]], %[[VAR20]] : f32
+// CHECK: %[[VAR22:.*]] = arith.select %[[VAR5]], %[[VAR17]], %[[VAR21]] : f32
+// CHECK: %[[VAR23:.*]] = arith.addf %[[VAR2]], %[[VAR22]] : f32
+// CHECK: %[[CST2:.*]]  = arith.constant 5.000000e-01 : f32
+// CHECK: %[[VAR24:.*]] = arith.mulf %[[VAR23]], %[[CST2]] : f32
+// CHECK: %[[VAR25:.*]] = math.sqrt %[[VAR24]] : f32
+// CHECK: %[[VAR26:.*]] = arith.cmpf olt, %[[VAR0]], %cst : f32
+// CHECK: %[[VAR27:.*]] = arith.cmpf olt, %[[VAR1]], %cst : f32
+// CHECK: %[[VAR28:.*]] = arith.addf %[[VAR25]], %[[VAR25]] : f32
+// CHECK: %[[VAR29:.*]] = arith.divf %[[VAR1]], %[[VAR28]] : f32
+// CHECK: %[[VAR30:.*]] = arith.negf %[[VAR25]] : f32
+// CHECK: %[[VAR31:.*]] = arith.select %[[VAR27]], %[[VAR30]], %[[VAR25]] : f32
+// CHECK: %[[VAR32:.*]] = arith.select %[[VAR26]], %[[VAR31]], %[[VAR29]] : f32
+// CHECK: %[[VAR33:.*]] = arith.addf %[[VAR32]], %[[VAR32]] : f32
+// CHECK: %[[VAR34:.*]] = arith.divf %[[VAR1]], %[[VAR33]] : f32
+// CHECK: %[[VAR35:.*]] = arith.select %[[VAR26]], %[[VAR34]], %[[VAR25]] : f32
+// CHECK: %[[VAR36:.*]] = arith.cmpf oeq, %[[VAR0]], %cst : f32
+// CHECK: %[[VAR37:.*]] = arith.cmpf oeq, %[[VAR1]], %cst : f32
+// CHECK: %[[VAR38:.*]] = arith.andi %[[VAR36]], %[[VAR37]] : i1
+// CHECK: %[[VAR39:.*]] = arith.select %[[VAR38]], %cst, %[[VAR35]] : f32
+// CHECK: %[[VAR40:.*]] = arith.select %[[VAR38]], %cst, %[[VAR32]] : f32
+// CHECK: %[[VAR41:.*]] = complex.create %[[VAR39]], %[[VAR40]] : complex<f32>
+// CHECK: return %[[VAR41]] : complex<f32>
+
 // -----
 
 // CHECK-LABEL: func @complex_conj
@@ -1254,42 +1303,42 @@ func.func @complex_atan2_with_fmf(%lhs: complex<f32>,
 // CHECK: %[[CST_6:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK: %[[VAR187:.*]] = complex.re %[[VAR186]] : complex<f32>
 // CHECK: %[[VAR188:.*]] = complex.im %[[VAR186]] : complex<f32>
-// CHECK: %[[VAR189:.*]] = math.absf %[[VAR187]] : f32
+// CHECK: %[[VAR189:.*]] = math.absf %[[VAR187]] fastmath<nnan,contract> : f32
 // CHECK: %[[CST_7:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK: %[[CST_8:.*]] = arith.constant 1.000000e+00 : f32
 // CHECK: %[[VAR190:.*]] = complex.re %[[VAR186]] : complex<f32>
 // CHECK: %[[VAR191:.*]] = complex.im %[[VAR186]] : complex<f32>
 // CHECK: %[[VAR192:.*]] = arith.cmpf oeq, %[[VAR190]], %[[CST_7]] : f32
 // CHECK: %[[VAR193:.*]] = arith.cmpf oeq, %[[VAR191]], %[[CST_7]] : f32
-// CHECK: %[[VAR194:.*]] = arith.divf %[[VAR191]], %[[VAR190]] : f32
-// CHECK: %[[VAR195:.*]] = arith.mulf %[[VAR194]], %[[VAR194]] : f32
-// CHECK: %[[VAR196:.*]] = arith.addf %[[VAR195]], %[[CST_8]] : f32
-// CHECK: %[[VAR197:.*]] = math.sqrt %[[VAR196]] : f32
-// CHECK: %[[VAR198:.*]] = math.absf %[[VAR190]] : f32
-// CHECK: %[[VAR199:.*]] = arith.mulf %[[VAR197]], %[[VAR198]] : f32
-// CHECK: %[[VAR200:.*]] = arith.divf %[[VAR190]], %[[VAR191]] : f32
-// CHECK: %[[VAR201:.*]] = arith.mulf %[[VAR200]], %[[VAR200]] : f32
-// CHECK: %[[VAR202:.*]] = arith.addf %[[VAR201]], %[[CST_8]] : f32
-// CHECK: %[[VAR203:.*]] = math.sqrt %[[VAR202]] : f32
-// CHECK: %[[VAR204:.*]] = math.absf %[[VAR191]] : f32
-// CHECK: %[[VAR205:.*]] = arith.mulf %[[VAR203]], %[[VAR204]] : f32
+// CHECK: %[[VAR194:.*]] = arith.divf %[[VAR191]], %[[VAR190]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR195:.*]] = arith.mulf %[[VAR194]], %[[VAR194]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR196:.*]] = arith.addf %[[VAR195]], %[[CST_8]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR197:.*]] = math.sqrt %[[VAR196]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR198:.*]] = math.absf %[[VAR190]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR199:.*]] = arith.mulf %[[VAR197]], %[[VAR198]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR200:.*]] = arith.divf %[[VAR190]], %[[VAR191]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR201:.*]] = arith.mulf %[[VAR200]], %[[VAR200]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR202:.*]] = arith.addf %[[VAR201]], %[[CST_8]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR203:.*]] = math.sqrt %[[VAR202]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR204:.*]] = math.absf %[[VAR191]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR205:.*]] = arith.mulf %[[VAR203]], %[[VAR204]] fastmath<nnan,contract> : f32
 // CHECK: %[[VAR206:.*]] = arith.cmpf ogt, %[[VAR190]], %[[VAR191]] : f32
 // CHECK: %[[VAR207:.*]] = arith.select %[[VAR206]], %[[VAR199]], %[[VAR205]] : f32
 // CHECK: %[[VAR208:.*]] = arith.select %[[VAR193]], %[[VAR198]], %[[VAR207]] : f32
 // CHECK: %[[VAR209:.*]] = arith.select %[[VAR192]], %[[VAR204]], %[[VAR208]] : f32
-// CHECK: %[[VAR210:.*]] = arith.addf %[[VAR189]], %[[VAR209]] : f32
+// CHECK: %[[VAR210:.*]] = arith.addf %[[VAR189]], %[[VAR209]] fastmath<nnan,contract> : f32
 // CHECK: %[[CST_9:.*]] = arith.constant 5.000000e-01 : f32
-// CHECK: %[[VAR211:.*]] = arith.mulf %[[VAR210]], %[[CST_9]] : f32
-// CHECK: %[[VAR212:.*]] = math.sqrt %[[VAR211]] : f32
+// CHECK: %[[VAR211:.*]] = arith.mulf %[[VAR210]], %[[CST_9]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR212:.*]] = math.sqrt %[[VAR211]] fastmath<nnan,contract> : f32
 // CHECK: %[[VAR213:.*]] = arith.cmpf olt, %[[VAR187]], %[[CST_6]] : f32
 // CHECK: %[[VAR214:.*]] = arith.cmpf olt, %[[VAR188]], %[[CST_6]] : f32
-// CHECK: %[[VAR215:.*]] = arith.addf %[[VAR212]], %[[VAR212]] : f32
-// CHECK: %[[VAR216:.*]] = arith.divf %[[VAR188]], %[[VAR215]] : f32
+// CHECK: %[[VAR215:.*]] = arith.addf %[[VAR212]], %[[VAR212]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR216:.*]] = arith.divf %[[VAR188]], %[[VAR215]] fastmath<nnan,contract> : f32
 // CHECK: %[[VAR217:.*]] = arith.negf %[[VAR212]] : f32
 // CHECK: %[[VAR218:.*]] = arith.select %[[VAR214]], %[[VAR217]], %[[VAR212]] : f32
 // CHECK: %[[VAR219:.*]] = arith.select %[[VAR213]], %[[VAR218]], %[[VAR216]] : f32
-// CHECK: %[[VAR220:.*]] = arith.addf %[[VAR219]], %[[VAR219]] : f32
-// CHECK: %[[VAR221:.*]] = arith.divf %[[VAR188]], %[[VAR220]] : f32
+// CHECK: %[[VAR220:.*]] = arith.addf %[[VAR219]], %[[VAR219]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR221:.*]] = arith.divf %[[VAR188]], %[[VAR220]] fastmath<nnan,contract> : f32
 // CHECK: %[[VAR222:.*]] = arith.select %[[VAR213]], %[[VAR221]], %[[VAR212]] : f32
 // CHECK: %[[VAR223:.*]] = arith.cmpf oeq, %[[VAR187]], %[[CST_6]] : f32
 // CHECK: %[[VAR224:.*]] = arith.cmpf oeq, %[[VAR188]], %[[CST_6]] : f32
@@ -1728,3 +1777,60 @@ func.func @complex_div_with_fmf(%lhs: complex<f32>, %rhs: complex<f32>) -> compl
 // CHECK: %[[RESULT_IMAG_WITH_SPECIAL_CASES:.*]] = arith.select %[[RESULT_IS_NAN]], %[[RESULT_IMAG_SPECIAL_CASE_1]], %[[RESULT_IMAG]] : f32
 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL_WITH_SPECIAL_CASES]], %[[RESULT_IMAG_WITH_SPECIAL_CASES]] : complex<f32>
 // CHECK: return %[[RESULT]] : complex<f32>
+
+// -----
+
+// CHECK-LABEL: func @complex_sqrt_with_fmf
+// CHECK-SAME: %[[ARG:.*]]: complex<f32>
+func.func @complex_sqrt_with_fmf(%arg: complex<f32>) -> complex<f32> {
+  %sqrt = complex.sqrt %arg fastmath<nnan,contract> : complex<f32>
+  return %sqrt : complex<f32>
+}
+
+// CHECK: %[[CST:.*]]  = arith.constant 0.000000e+00 : f32
+// CHECK: %[[VAR0:.*]] = complex.re %[[ARG]] : complex<f32>
+// CHECK: %[[VAR1:.*]] = complex.im %[[ARG]] : complex<f32>
+// CHECK: %[[VAR2:.*]] = math.absf %[[VAR0]] fastmath<nnan,contract> : f32
+// CHECK: %[[CST0:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK: %[[CST1:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK: %[[VAR3:.*]] = complex.re %[[ARG]] : complex<f32>
+// CHECK: %[[VAR4:.*]] = complex.im %[[ARG]] : complex<f32>
+// CHECK: %[[VAR5:.*]] = arith.cmpf oeq, %[[VAR3]], %[[CST0]] : f32
+// CHECK: %[[VAR6:.*]] = arith.cmpf oeq, %[[VAR4]], %[[CST0]] : f32
+// CHECK: %[[VAR7:.*]] = arith.divf %[[VAR4]], %[[VAR3]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR8:.*]] = arith.mulf %[[VAR7]], %[[VAR7]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR9:.*]] = arith.addf %[[VAR8]], %[[CST1]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR10:.*]] = math.sqrt %[[VAR9]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR11:.*]] = math.absf %[[VAR3]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR12:.*]] = arith.mulf %[[VAR10]], %[[VAR11]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR13:.*]] = arith.divf %[[VAR3]], %[[VAR4]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR14:.*]] = arith.mulf %[[VAR13]], %[[VAR13]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR15:.*]] = arith.addf %[[VAR14]], %[[CST1]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR16:.*]] = math.sqrt %[[VAR15]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR17:.*]] = math.absf %[[VAR4]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR18:.*]] = arith.mulf %[[VAR16]], %[[VAR17]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR19:.*]] = arith.cmpf ogt, %[[VAR3]], %[[VAR4]] : f32
+// CHECK: %[[VAR20:.*]] = arith.select %[[VAR19]], %[[VAR12]], %[[VAR18]] : f32
+// CHECK: %[[VAR21:.*]] = arith.select %[[VAR6]], %[[VAR11]], %[[VAR20]] : f32
+// CHECK: %[[VAR22:.*]] = arith.select %[[VAR5]], %[[VAR17]], %[[VAR21]] : f32
+// CHECK: %[[VAR23:.*]] = arith.addf %[[VAR2]], %[[VAR22]] fastmath<nnan,contract> : f32
+// CHECK: %[[CST2:.*]]  = arith.constant 5.000000e-01 : f32
+// CHECK: %[[VAR24:.*]] = arith.mulf %[[VAR23]], %[[CST2]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR25:.*]] = math.sqrt %[[VAR24]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR26:.*]] = arith.cmpf olt, %[[VAR0]], %cst : f32
+// CHECK: %[[VAR27:.*]] = arith.cmpf olt, %[[VAR1]], %cst : f32
+// CHECK: %[[VAR28:.*]] = arith.addf %[[VAR25]], %[[VAR25]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR29:.*]] = arith.divf %[[VAR1]], %[[VAR28]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR30:.*]] = arith.negf %[[VAR25]] : f32
+// CHECK: %[[VAR31:.*]] = arith.select %[[VAR27]], %[[VAR30]], %[[VAR25]] : f32
+// CHECK: %[[VAR32:.*]] = arith.select %[[VAR26]], %[[VAR31]], %[[VAR29]] : f32
+// CHECK: %[[VAR33:.*]] = arith.addf %[[VAR32]], %[[VAR32]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR34:.*]] = arith.divf %[[VAR1]], %[[VAR33]] fastmath<nnan,contract> : f32
+// CHECK: %[[VAR35:.*]] = arith.select %[[VAR26]], %[[VAR34]], %[[VAR25]] : f32
+// CHECK: %[[VAR36:.*]] = arith.cmpf oeq, %[[VAR0]], %cst : f32
+// CHECK: %[[VAR37:.*]] = arith.cmpf oeq, %[[VAR1]], %cst : f32
+// CHECK: %[[VAR38:.*]] = arith.andi %[[VAR36]], %[[VAR37]] : i1
+// CHECK: %[[VAR39:.*]] = arith.select %[[VAR38]], %cst, %[[VAR35]] : f32
+// CHECK: %[[VAR40:.*]] = arith.select %[[VAR38]], %cst, %[[VAR32]] : f32
+// CHECK: %[[VAR41:.*]] = complex.create %[[VAR39]], %[[VAR40]] : complex<f32>
+// CHECK: return %[[VAR41]] : complex<f32>
diff --git a/mlir/test/Dialect/EmitC/invalid_ops.mlir b/mlir/test/Dialect/EmitC/invalid_ops.mlir
index 58b3a11..6294c85 100644
--- a/mlir/test/Dialect/EmitC/invalid_ops.mlir
+++ b/mlir/test/Dialect/EmitC/invalid_ops.mlir
@@ -235,7 +235,7 @@ func.func @test_misplaced_yield() {
 // -----
 
 func.func @test_assign_to_non_variable(%arg1: f32, %arg2: f32) {
-  // expected-error @+1 {{'emitc.assign' op requires first operand (<block argument> of type 'f32' at index: 1) to be a Variable}}
+  // expected-error @+1 {{'emitc.assign' op requires first operand (<block argument> of type 'f32' at index: 1) to be a Variable or subscript}}
   emitc.assign %arg1 : f32 to %arg2 : f32
   return
 }
@@ -387,3 +387,11 @@ func.func @logical_or_resulterror(%arg0: i32, %arg1: i32) {
   %0 = "emitc.logical_or"(%arg0, %arg1) : (i32, i32) -> i32
   return
 }
+
+// -----
+
+func.func @test_subscript_indices_mismatch(%arg0: !emitc.array<4x8xf32>, %arg2: index) {
+  // expected-error @+1 {{'emitc.subscript' op requires number of indices (1) to match the rank of the array type (2)}}
+  %0 = emitc.subscript %arg0[%arg2] : <4x8xf32>, index
+  return
+}
diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
index a1a5239..9127eac 100644
--- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
@@ -19,10 +19,49 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
-func.func @depthwise_conv1d_nwc_wc_dyn_ch_dim(%input: memref<3x5x?xf32>, %filter: memref<2x?xf32>, %output: memref<3x2x?xf32>) {
+// Masked vectorisation of 1D depthwise CW convs is not yet supported
+
+func.func @depthwise_conv1d_ncw_cw(%input: memref<3x?x4xf32>, %filter: memref<?x1xf32>, %output: memref<3x?x4xf32>) {
+  // expected-error @+1 {{Attempted to vectorize, but failed}}
+  linalg.depthwise_conv_1d_ncw_cw
+    {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
+    ins(%input, %filter : memref<3x?x4xf32>, memref<?x1xf32>)
+    outs(%output : memref<3x?x4xf32>)
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["linalg.depthwise_conv_1d_ncw_cw"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    transform.structured.vectorize %0 vector_sizes [3, 4, 5, 1] : !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
+func.func @depthwise_conv1d_nwc_wc_dyn_w_dim(%input: memref<3x?x4xf32>, %filter: memref<?x4xf32>, %output: memref<3x?x4xf32>) {
   // expected-error @+1 {{Attempted to vectorize, but failed}}
   linalg.depthwise_conv_1d_nwc_wc
     {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
+    ins(%input, %filter : memref<3x?x4xf32>, memref<?x4xf32>)
+    outs(%output : memref<3x?x4xf32>)
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["linalg.depthwise_conv_1d_nwc_wc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    transform.structured.vectorize %0 vector_sizes [3, 2, 4, 2] : !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
+func.func @depthwise_conv1d_nwc_wc_dyn_ch_dim(%input: memref<3x5x?xf32>, %filter: memref<2x?xf32>, %output: memref<3x2x?xf32>) {
+  // expected-error @+1 {{Attempted to vectorize, but failed}}
+  linalg.depthwise_conv_1d_nwc_wc
     ins(%input, %filter : memref<3x5x?xf32>, memref<2x?xf32>)
     outs(%output : memref<3x2x?xf32>)
   return
@@ -41,7 +80,6 @@ module attributes {transform.with_named_sequence} {
 func.func @depthwise_conv1d_nwc_wc_dyn_w_dim(%input: memref<3x?x3xf32>, %filter: memref<2x3xf32>, %output: memref<3x?x3xf32>) {
   // expected-error @+1 {{Attempted to vectorize, but failed}}
   linalg.depthwise_conv_1d_nwc_wc
-    {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : memref<3x?x3xf32>, memref<2x3xf32>)
     outs(%output : memref<3x?x3xf32>)
   return
diff --git a/mlir/test/Dialect/Linalg/vectorize-conv-masked-and-scalable.mlir b/mlir/test/Dialect/Linalg/vectorize-conv-masked-and-scalable.mlir
new file mode 100644
index 0000000..84b556d
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/vectorize-conv-masked-and-scalable.mlir
@@ -0,0 +1,185 @@
+// RUN: mlir-opt -split-input-file -transform-interpreter -cse %s | FileCheck %s
+
+func.func @depthwise_conv1d_nwc_wc_1x8x3xi8_tensor(%input: tensor<1x8x?xi8>,
+                                                   %filter: tensor<1x?xi8>,
+                                                   %output: tensor<1x8x?xi8>) -> (tensor<1x8x?xi8>) {
+  %res = linalg.depthwise_conv_1d_nwc_wc
+    {dilations = dense<1> : vector<1xi64>,
+    strides = dense<1> : vector<1xi64>}
+    ins(%input, %filter : tensor<1x8x?xi8>, tensor<1x?xi8>)
+    outs(%output : tensor<1x8x?xi8>) -> tensor<1x8x?xi8>
+  return %res : tensor<1x8x?xi8>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["linalg.depthwise_conv_1d_nwc_wc"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    transform.structured.vectorize %0 vector_sizes [1, 8, 4, 1] : !transform.any_op
+    transform.yield
+  }
+}
+
+// CHECK-LABEL:   func.func @depthwise_conv1d_nwc_wc_1x8x3xi8_tensor(
+// CHECK-SAME:      %[[INPUT:.*]]: tensor<1x8x?xi8>,
+// CHECK-SAME:      %[[FILTER:.*]]: tensor<1x?xi8>,
+// CHECK-SAME:      %[[OUTPUT:.*]]: tensor<1x8x?xi8>) -> tensor<1x8x?xi8> {
+
+// CHECK:           %[[C1:.*]] = arith.constant 1 : index
+// CHECK:           %[[C0:.*]] = arith.constant 0 : index
+// CHECK:           %[[PAD:.*]] = arith.constant 0 : i8
+
+/// Create a mask for the input tensor
+// CHECK:           %[[C2:.*]] = arith.constant 2 : index
+// CHECK:           %[[CH_DIM_IN:.*]] = tensor.dim %[[INPUT]], %[[C2]] : tensor<1x8x?xi8>
+// CHECK:           %[[C8:.*]] = arith.constant 8 : index
+// CHECK:           %[[MASK_IN:.*]] = vector.create_mask %[[C1]], %[[C8]], %[[CH_DIM_IN]] : vector<1x8x4xi1>
+/// Read the input tensor
+// CHECK:           %[[VEC_IN:.*]] = vector.mask %[[MASK_IN]] { vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] : tensor<1x8x?xi8>, vector<1x8x4xi8> } : vector<1x8x4xi1> -> vector<1x8x4xi8>
+
+/// Create a mask for the filter tensor
+// CHECK:           %[[CH_DIM_FLT:.*]] = tensor.dim %[[FILTER]], %[[C1]] : tensor<1x?xi8>
+// CHECK:           %[[MASK_FLT:.*]] = vector.create_mask %[[C1]], %[[CH_DIM_FLT]] : vector<1x4xi1>
+/// Read the filter tensor
+// CHECK:           %[[VEC_FLT:.*]] = vector.mask %[[MASK_FLT]] { vector.transfer_read %[[FILTER]]{{\[}}%[[C0]], %[[C0]]], %[[PAD]] : tensor<1x?xi8>, vector<1x4xi8> } : vector<1x4xi1> -> vector<1x4xi8>
+
+/// Create a mask for the output tensor
+// CHECK:           %[[CH_DIM_OUT:.*]] = tensor.dim %[[OUTPUT]], %[[C2]] : tensor<1x8x?xi8>
+// CHECK:           %[[MASK_OUT:.*]] = vector.create_mask %[[C1]], %[[C8]], %[[CH_DIM_OUT]] : vector<1x8x4xi1>
+// CHECK:           %[[VEC_OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_read %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] : tensor<1x8x?xi8>, vector<1x8x4xi8> } : vector<1x8x4xi1> -> vector<1x8x4xi8>
+
+/// Convolution
+// CHECK:           %[[IN_1:.*]] = vector.extract_strided_slice %[[VEC_IN]] {offsets = [0, 0, 0], sizes = [1, 8, 4], strides = [1, 1, 1]} : vector<1x8x4xi8> to vector<1x8x4xi8>
+// CHECK:           %[[FLT_1:.*]] = vector.extract %[[VEC_FLT]][0] : vector<4xi8> from vector<1x4xi8>
+// CHECK:           %[[OUT_1:.*]] = vector.extract_strided_slice %[[VEC_OUT]] {offsets = [0, 0, 0], sizes = [1, 8, 4], strides = [1, 1, 1]} : vector<1x8x4xi8> to vector<1x8x4xi8>
+// CHECK:           %[[FLT_1_B:.*]] = vector.broadcast %[[FLT_1]] : vector<4xi8> to vector<1x8x4xi8>
+// CHECK:           %[[MULI:.*]] = arith.muli %[[IN_1]], %[[FLT_1_B]] : vector<1x8x4xi8>
+// CHECK:           %[[ADDI:.*]] = arith.addi %[[MULI]], %[[OUT_1]] : vector<1x8x4xi8>
+// CHECK:           %[[OUT_INS:.*]] = vector.insert_strided_slice %[[ADDI]], %[[VEC_OUT]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<1x8x4xi8> into vector<1x8x4xi8>
+// CHECK:           %[[OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_write %[[OUT_INS]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] : vector<1x8x4xi8>, tensor<1x8x?xi8> } : vector<1x8x4xi1> -> tensor<1x8x?xi8>
+// CHECK:           return %[[OUT]] : tensor<1x8x?xi8>
+
+// -----
+
+func.func @depthwise_conv1d_nwc_wc_1x8x3xi8_tensor_scalable(
+      %input: tensor<1x8x?xi8>,
+      %filter: tensor<1x?xi8>,
+      %output: tensor<1x8x?xi8>) -> (tensor<1x8x?xi8>) {
+  %res = linalg.depthwise_conv_1d_nwc_wc
+    {dilations = dense<1> : vector<1xi64>,
+    strides = dense<1> : vector<1xi64>}
+    ins(%input, %filter : tensor<1x8x?xi8>, tensor<1x?xi8>)
+    outs(%output : tensor<1x8x?xi8>) -> tensor<1x8x?xi8>
+  return %res : tensor<1x8x?xi8>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["linalg.depthwise_conv_1d_nwc_wc"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    transform.structured.vectorize %0 vector_sizes [1, 8, [4], 1] : !transform.any_op
+    transform.yield
+  }
+}
+
+// CHECK-LABEL:   func.func @depthwise_conv1d_nwc_wc_1x8x3xi8_tensor_scalable(
+// CHECK-SAME:      %[[INPUT:.*]]: tensor<1x8x?xi8>,
+// CHECK-SAME:      %[[FILTER:.*]]: tensor<1x?xi8>,
+// CHECK-SAME:      %[[OUTPUT:.*]]: tensor<1x8x?xi8>) -> tensor<1x8x?xi8> {
+
+// CHECK:           %[[C1:.*]] = arith.constant 1 : index
+// CHECK:           %[[C0:.*]] = arith.constant 0 : index
+// CHECK:           %[[PAD:.*]] = arith.constant 0 : i8
+
+/// Create a mask for the input tensor
+// CHECK:           %[[C2:.*]] = arith.constant 2 : index
+// CHECK:           %[[CH_DIM_IN:.*]] = tensor.dim %[[INPUT]], %[[C2]] : tensor<1x8x?xi8>
+// CHECK:           %[[C8:.*]] = arith.constant 8 : index
+// CHECK:           %[[MASK_IN:.*]] = vector.create_mask %[[C1]], %[[C8]], %[[CH_DIM_IN]] : vector<1x8x[4]xi1>
+/// Read the input tensor
+// CHECK:           %[[VEC_IN:.*]] = vector.mask %[[MASK_IN]] { vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] : tensor<1x8x?xi8>, vector<1x8x[4]xi8> } : vector<1x8x[4]xi1> -> vector<1x8x[4]xi8>
+
+/// Create a mask for the filter tensor
+// CHECK:           %[[CH_DIM_FLT:.*]] = tensor.dim %[[FILTER]], %[[C1]] : tensor<1x?xi8>
+// CHECK:           %[[MASK_FLT:.*]] = vector.create_mask %[[C1]], %[[CH_DIM_FLT]] : vector<1x[4]xi1>
+/// Read the filter tensor
+// CHECK:           %[[VEC_FLT:.*]] = vector.mask %[[MASK_FLT]] { vector.transfer_read %[[FILTER]]{{\[}}%[[C0]], %[[C0]]], %[[PAD]] : tensor<1x?xi8>, vector<1x[4]xi8> } : vector<1x[4]xi1> -> vector<1x[4]xi8>
+
+/// Create a mask for the output tensor
+// CHECK:           %[[CH_DIM_OUT:.*]] = tensor.dim %[[OUTPUT]], %[[C2]] : tensor<1x8x?xi8>
+// CHECK:           %[[MASK_OUT:.*]] = vector.create_mask %[[C1]], %[[C8]], %[[CH_DIM_OUT]] : vector<1x8x[4]xi1>
+/// Read the output tensor
+// CHECK:           %[[VEC_OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_read %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] : tensor<1x8x?xi8>, vector<1x8x[4]xi8> } : vector<1x8x[4]xi1> -> vector<1x8x[4]xi8>
+
+/// Convolution
+// CHECK:           %[[IN_1:.*]] = vector.extract_strided_slice %[[VEC_IN]] {offsets = [0, 0, 0], sizes = [1, 8, 4], strides = [1, 1, 1]} : vector<1x8x[4]xi8> to vector<1x8x[4]xi8>
+// CHECK:           %[[FLT_1:.*]] = vector.extract %[[VEC_FLT]][0] : vector<[4]xi8> from vector<1x[4]xi8>
+// CHECK:           %[[OUT_1:.*]] = vector.extract_strided_slice %[[VEC_OUT]] {offsets = [0, 0, 0], sizes = [1, 8, 4], strides = [1, 1, 1]} : vector<1x8x[4]xi8> to vector<1x8x[4]xi8>
+// CHECK:           %[[FLT_1_B:.*]] = vector.broadcast %[[FLT_1]] : vector<[4]xi8> to vector<1x8x[4]xi8>
+// CHECK:           %[[MULI:.*]] = arith.muli %[[IN_1]], %[[FLT_1_B]] : vector<1x8x[4]xi8>
+// CHECK:           %[[ADDI:.*]] = arith.addi %[[MULI]], %[[OUT_1]] : vector<1x8x[4]xi8>
+// CHECK:           %[[OUT_INS:.*]] = vector.insert_strided_slice %[[ADDI]], %[[VEC_OUT]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<1x8x[4]xi8> into vector<1x8x[4]xi8>
+// CHECK:           %[[OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_write %[[OUT_INS]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] : vector<1x8x[4]xi8>, tensor<1x8x?xi8> } : vector<1x8x[4]xi1> -> tensor<1x8x?xi8>
+// CHECK:           return %[[OUT]] : tensor<1x8x?xi8>
+
+// -----
+
+func.func @depthwise_conv1d_nwc_wc_3x5x4xf32_memref_dilation_2(
+      %input: memref<3x5x?xf32>,
+      %filter: memref<2x?xf32>,
+      %output: memref<3x2x?xf32>) {
+  linalg.depthwise_conv_1d_nwc_wc
+    {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
+    ins(%input, %filter : memref<3x5x?xf32>, memref<2x?xf32>)
+    outs(%output : memref<3x2x?xf32>)
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["linalg.depthwise_conv_1d_nwc_wc"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    transform.structured.vectorize %0 vector_sizes [3, 2, [4], 2] : !transform.any_op
+    transform.yield
+  }
+}
+
+// CHECK-LABEL:   func.func @depthwise_conv1d_nwc_wc_3x5x4xf32_memref_dilation_2(
+// CHECK-SAME:      %[[INPUT:.*]]: memref<3x5x?xf32>,
+// CHECK-SAME:      %[[FILTER:.*]]: memref<2x?xf32>,
+// CHECK-SAME:      %[[OUTPUT:.*]]: memref<3x2x?xf32>) {
+
+// CHECK:           %[[C1:.*]] = arith.constant 1 : index
+// CHECK:           %[[C0:.*]] = arith.constant 0 : index
+// CHECK:           %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[C2:.*]] = arith.constant 2 : index
+
+/// Create a mask for the input tensor
+// CHECK:           %[[CH_DIM_IN:.*]] = memref.dim %[[INPUT]], %[[C2]] : memref<3x5x?xf32>
+// CHECK:           %[[C3:.*]] = arith.constant 3 : index
+// CHECK:           %[[C5:.*]] = arith.constant 5 : index
+// CHECK:           %[[MASK_IN:.*]] = vector.create_mask %[[C3]], %[[C5]], %[[CH_DIM_IN]] : vector<3x4x[4]xi1>
+/// Read the input tensor
+// CHECK:           %[[VEC_IN:.*]] = vector.mask %[[MASK_IN]] { vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] : memref<3x5x?xf32>, vector<3x4x[4]xf32> } : vector<3x4x[4]xi1> -> vector<3x4x[4]xf32>
+
+/// Create a mask for the filter tensor
+// CHECK:           %[[CH_DIM_FLT:.*]] = memref.dim %[[FILTER]], %[[C1]] : memref<2x?xf32>
+// CHECK:           %[[MASK_FLT:.*]] = vector.create_mask %[[C2]], %[[CH_DIM_FLT]] : vector<2x[4]xi1>
+/// Read the filter tensor
+// CHECK:           %[[VEC_FLT:.*]] = vector.mask %[[MASK_FLT]] { vector.transfer_read %[[FILTER]]{{\[}}%[[C0]], %[[C0]]], %[[PAD]] : memref<2x?xf32>, vector<2x[4]xf32> } : vector<2x[4]xi1> -> vector<2x[4]xf32>
+
+/// Create a mask for the output tensor
+// CHECK:           %[[CH_DIM_OUT:.*]] = memref.dim %[[OUTPUT]], %[[C2]] : memref<3x2x?xf32>
+// CHECK:           %[[MASK_OUT:.*]] = vector.create_mask %[[C3]], %[[C2]], %[[CH_DIM_OUT]] : vector<3x2x[4]xi1>
+/// Read the output tensor
+// CHECK:           %[[VEC_OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_read %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] : memref<3x2x?xf32>, vector<3x2x[4]xf32> } : vector<3x2x[4]xi1> -> vector<3x2x[4]xf32>
+
+/// Convolution
+// CHECK:           %[[IN_1:.*]] = vector.extract_strided_slice %[[VEC_IN]] {offsets = [0, 0, 0], sizes = [3, 2, 4], strides = [1, 1, 1]} : vector<3x4x[4]xf32> to vector<3x2x[4]xf32>
+// CHECK:           %[[IN_2:.*]] = vector.extract_strided_slice %[[VEC_IN]] {offsets = [0, 2, 0], sizes = [3, 2, 4], strides = [1, 1, 1]} : vector<3x4x[4]xf32> to vector<3x2x[4]xf32>
+// CHECK:           %[[FLT_1:.*]] = vector.extract %[[VEC_FLT]][0] : vector<[4]xf32> from vector<2x[4]xf32>
+// CHECK:           %[[FLT_2:.*]] = vector.extract %[[VEC_FLT]][1] : vector<[4]xf32> from vector<2x[4]xf32>
+// CHECK:           %[[OUT_1:.*]] = vector.extract_strided_slice %[[VEC_OUT]] {offsets = [0, 0, 0], sizes = [3, 2, 4], strides = [1, 1, 1]} : vector<3x2x[4]xf32> to vector<3x2x[4]xf32>
+// CHECK:           %[[FLT_1_B:.*]] = vector.broadcast %[[FLT_1]] : vector<[4]xf32> to vector<3x2x[4]xf32>
+// CHECK:           %[[FMA_1:.*]] = vector.fma %[[IN_1]], %[[FLT_1_B]], %[[OUT_1]] : vector<3x2x[4]xf32>
+// CHECK:           %[[FLT_2_B:.*]] = vector.broadcast %[[FLT_2]] : vector<[4]xf32> to vector<3x2x[4]xf32>
+// CHECK:           %[[FMA_2:.*]] = vector.fma %[[IN_2]], %[[FLT_2_B]], %[[FMA_1]] : vector<3x2x[4]xf32>
+// CHECK:           %[[OUT_INS:.*]] = vector.insert_strided_slice %[[FMA_2]], %[[VEC_OUT]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<3x2x[4]xf32> into vector<3x2x[4]xf32>
+// CHECK:           vector.mask %[[MASK_OUT]] { vector.transfer_write %[[OUT_INS]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] : vector<3x2x[4]xf32>, memref<3x2x?xf32> } : vector<3x2x[4]xi1>
diff --git a/mlir/test/Dialect/Math/polynomial-approximation.mlir b/mlir/test/Dialect/Math/polynomial-approximation.mlir
index 834a7dc..93ecd67 100644
--- a/mlir/test/Dialect/Math/polynomial-approximation.mlir
+++ b/mlir/test/Dialect/Math/polynomial-approximation.mlir
@@ -94,6 +94,20 @@ func.func @erf_vector(%arg0: vector<8xf32>) -> vector<8xf32> {
   return %0 : vector<8xf32>
 }
 
+// CHECK-LABEL:   func @erf_scalable_vector(
+// CHECK-SAME:                     %[[arg0:.*]]: vector<[8]xf32>) -> vector<[8]xf32> {
+// CHECK:           %[[zero:.*]] = arith.constant dense<0.000000e+00> : vector<[8]xf32>
+// CHECK-NOT:       erf
+// CHECK-NOT:       vector<8xf32>
+// CHECK-COUNT-20:  select {{.*}} : vector<[8]xi1>, vector<[8]xf32>
+// CHECK:           %[[res:.*]] = arith.select {{.*}} : vector<[8]xi1>, vector<[8]xf32>
+// CHECK:           return %[[res]] : vector<[8]xf32>
+// CHECK:         }
+func.func @erf_scalable_vector(%arg0: vector<[8]xf32>) -> vector<[8]xf32> {
+  %0 = math.erf %arg0 : vector<[8]xf32>
+  return %0 : vector<[8]xf32>
+}
+
 // CHECK-LABEL:   func @exp_scalar(
 // CHECK-SAME:                     %[[VAL_0:.*]]: f32) -> f32 {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 5.000000e-01 : f32
@@ -151,6 +165,17 @@ func.func @exp_vector(%arg0: vector<8xf32>) -> vector<8xf32> {
   return %0 : vector<8xf32>
 }
 
+// CHECK-LABEL:   func @exp_scalable_vector
+// CHECK-NOT:      math.exp
+// CHECK-NOT:      vector<8xf32>
+// CHECK-COUNT-46: vector<[8]x{{(i32)|(f32)}}>
+// CHECK-NOT:      vector<8xf32>
+// CHECK-NOT:      math.exp
+func.func @exp_scalable_vector(%arg0: vector<[8]xf32>) -> vector<[8]xf32> {
+  %0 = math.exp %arg0 : vector<[8]xf32>
+  return %0 : vector<[8]xf32>
+}
+
 // CHECK-LABEL:   func @expm1_scalar(
 // CHECK-SAME:                       %[[X:.*]]: f32) -> f32 {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 1.000000e+00 : f32
@@ -277,6 +302,22 @@ func.func @expm1_vector(%arg0: vector<8x8xf32>) -> vector<8x8xf32> {
   return %0 : vector<8x8xf32>
 }
 
+// CHECK-LABEL:   func @expm1_scalable_vector(
+// CHECK-SAME:                       %{{.*}}: vector<8x[8]xf32>) -> vector<8x[8]xf32> {
+// CHECK-NOT:       vector<8x8xf32>
+// CHECK-NOT:       exp
+// CHECK-NOT:       log
+// CHECK-NOT:       expm1
+// CHECK-COUNT-127: vector<8x[8]x{{(i32)|(f32)|(i1)}}>
+// CHECK-NOT:       vector<8x8xf32>
+// CHECK-NOT:       exp
+// CHECK-NOT:       log
+// CHECK-NOT:       expm1
+func.func @expm1_scalable_vector(%arg0: vector<8x[8]xf32>) -> vector<8x[8]xf32> {
+  %0 = math.expm1 %arg0 : vector<8x[8]xf32>
+  return %0 : vector<8x[8]xf32>
+}
+
 // CHECK-LABEL:   func @log_scalar(
 // CHECK-SAME:                             %[[X:.*]]: f32) -> f32 {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f32
@@ -357,6 +398,18 @@ func.func @log_vector(%arg0: vector<8xf32>) -> vector<8xf32> {
   return %0 : vector<8xf32>
 }
 
+// CHECK-LABEL:   func @log_scalable_vector(
+// CHECK-SAME:                     %{{.*}}: vector<[8]xf32>) -> vector<[8]xf32> {
+// CHECK:           %[[CST_LN2:.*]] = arith.constant dense<0.693147182> : vector<[8]xf32>
+// CHECK-COUNT-5:   select {{.*}} : vector<[8]xi1>, vector<[8]xf32>
+// CHECK:           %[[VAL_71:.*]] = arith.select {{.*}} : vector<[8]xi1>, vector<[8]xf32>
+// CHECK:           return %[[VAL_71]] : vector<[8]xf32>
+// CHECK:         }
+func.func @log_scalable_vector(%arg0: vector<[8]xf32>) -> vector<[8]xf32> {
+  %0 = math.log %arg0 : vector<[8]xf32>
+  return %0 : vector<[8]xf32>
+}
+
 // CHECK-LABEL:   func @log2_scalar(
 // CHECK-SAME:                      %[[VAL_0:.*]]: f32) -> f32 {
 // CHECK:           %[[CST_LOG2E:.*]] = arith.constant 1.44269502 : f32
@@ -381,6 +434,18 @@ func.func @log2_vector(%arg0: vector<8xf32>) -> vector<8xf32> {
   return %0 : vector<8xf32>
 }
 
+// CHECK-LABEL:   func @log2_scalable_vector(
+// CHECK-SAME:                      %{{.*}}: vector<[8]xf32>) -> vector<[8]xf32> {
+// CHECK:           %[[CST_LOG2E:.*]] = arith.constant dense<1.44269502> : vector<[8]xf32>
+// CHECK-COUNT-5:   select {{.*}} : vector<[8]xi1>, vector<[8]xf32>
+// CHECK:           %[[VAL_71:.*]] = arith.select {{.*}} : vector<[8]xi1>, vector<[8]xf32>
+// CHECK:           return %[[VAL_71]] : vector<[8]xf32>
+// CHECK:         }
+func.func @log2_scalable_vector(%arg0: vector<[8]xf32>) -> vector<[8]xf32> {
+  %0 = math.log2 %arg0 : vector<[8]xf32>
+  return %0 : vector<[8]xf32>
+}
+
 // CHECK-LABEL:   func @log1p_scalar(
 // CHECK-SAME:                       %[[X:.*]]: f32) -> f32 {
 // CHECK:           %[[CST_ONE:.*]] = arith.constant 1.000000e+00 : f32
@@ -414,6 +479,17 @@ func.func @log1p_vector(%arg0: vector<8xf32>) -> vector<8xf32> {
   return %0 : vector<8xf32>
 }
 
+// CHECK-LABEL:   func @log1p_scalable_vector(
+// CHECK-SAME:                       %[[VAL_0:.*]]: vector<[8]xf32>) -> vector<[8]xf32> {
+// CHECK:           %[[CST_ONE:.*]] = arith.constant dense<1.000000e+00> : vector<[8]xf32>
+// CHECK-COUNT-6:   select {{.*}} : vector<[8]xi1>, vector<[8]xf32>
+// CHECK:           %[[VAL_79:.*]] = arith.select {{.*}} : vector<[8]xi1>, vector<[8]xf32>
+// CHECK:           return %[[VAL_79]] : vector<[8]xf32>
+// CHECK:         }
+func.func @log1p_scalable_vector(%arg0: vector<[8]xf32>) -> vector<[8]xf32> {
+  %0 = math.log1p %arg0 : vector<[8]xf32>
+  return %0 : vector<[8]xf32>
+}
 
 // CHECK-LABEL:   func @tanh_scalar(
 // CHECK-SAME:                      %[[VAL_0:.*]]: f32) -> f32 {
@@ -470,6 +546,19 @@ func.func @tanh_vector(%arg0: vector<8xf32>) -> vector<8xf32> {
   return %0 : vector<8xf32>
 }
 
+// CHECK-LABEL:   func @tanh_scalable_vector(
+// CHECK-SAME:                      %[[VAL_0:.*]]: vector<[8]xf32>) -> vector<[8]xf32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant dense<-7.99881172> : vector<[8]xf32>
+// CHECK-NOT:       tanh
+// CHECK-COUNT-2:   select {{.*}} : vector<[8]xi1>, vector<[8]xf32>
+// CHECK:           %[[VAL_33:.*]] = arith.select {{.*}} : vector<[8]xi1>, vector<[8]xf32>
+// CHECK:           return %[[VAL_33]] : vector<[8]xf32>
+// CHECK:         }
+func.func @tanh_scalable_vector(%arg0: vector<[8]xf32>) -> vector<[8]xf32> {
+  %0 = math.tanh %arg0 : vector<[8]xf32>
+  return %0 : vector<[8]xf32>
+}
+
 // We only approximate rsqrt for vectors and when the AVX2 option is enabled.
 // CHECK-LABEL:   func @rsqrt_scalar
 // AVX2-LABEL:    func @rsqrt_scalar
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dual_sparse_conv_2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dual_sparse_conv_2d.mlir
index 350b5b4..c645ca6 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dual_sparse_conv_2d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dual_sparse_conv_2d.mlir
@@ -222,6 +222,7 @@ module {
     bufferization.dealloc_tensor %sparse_filter_CD : tensor<3x3xi32, #CDR>
     bufferization.dealloc_tensor %sparse_filter_CSC : tensor<3x3xi32, #CSC>
 
+    bufferization.dealloc_tensor %0 : tensor<6x6xi32>
     bufferization.dealloc_tensor %2 : tensor<6x6xi32, #DCSR>
     bufferization.dealloc_tensor %3 : tensor<6x6xi32, #CSR>
     bufferization.dealloc_tensor %4 : tensor<6x6xi32, #CDR>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir
index f8fb8fd..55d4cae 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir
@@ -273,11 +273,14 @@ module {
     bufferization.dealloc_tensor %sparse_input_CSC : tensor<8x8xi32, #CSC>
     bufferization.dealloc_tensor %sparse_input_CD : tensor<8x8xi32, #CDR>
 
+    bufferization.dealloc_tensor %0 : tensor<6x6xi32>
     bufferization.dealloc_tensor %1 : tensor<6x6xi32, #DCSR>
     bufferization.dealloc_tensor %2 : tensor<6x6xi32, #DCSR>
     bufferization.dealloc_tensor %3 : tensor<6x6xi32, #CSR>
     bufferization.dealloc_tensor %4 : tensor<6x6xi32, #CDR>
     bufferization.dealloc_tensor %5 : tensor<6x6xi32, #CSC>
+    bufferization.dealloc_tensor %6 : tensor<6x6xi32>
+
     return
   }
 }
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir
index 9b05f9b..e145c454 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir
@@ -233,6 +233,19 @@ module {
     // bufferization.dealloc_tensor %s2pp4 : tensor<2x?x?xf64, #Tensor4>
     // bufferization.dealloc_tensor %s2pp5 : tensor<2x?x?xf64, #Tensor5>
     // bufferization.dealloc_tensor %s2pp6 : tensor<2x?x?xf64, #Tensor6>
+
+    bufferization.dealloc_tensor %d2341 : tensor<2x3x4xf64>
+    bufferization.dealloc_tensor %d2342 : tensor<2x3x4xf64>
+    bufferization.dealloc_tensor %d2343 : tensor<2x3x4xf64>
+    bufferization.dealloc_tensor %d2344 : tensor<2x3x4xf64>
+    bufferization.dealloc_tensor %d2345 : tensor<2x3x4xf64>
+    bufferization.dealloc_tensor %d2346 : tensor<2x3x4xf64>
+    bufferization.dealloc_tensor %dp344 : tensor<?x3x4xf64>
+    bufferization.dealloc_tensor %d2p45 : tensor<2x?x4xf64>
+    bufferization.dealloc_tensor %d23p6 : tensor<2x3x?xf64>
+    bufferization.dealloc_tensor %dp3p4 : tensor<?x3x?xf64>
+    bufferization.dealloc_tensor %dpp45 : tensor<?x?x4xf64>
+
     return
   }
 }
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir
index 0f9dfb9..12f8e34 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir
@@ -114,12 +114,14 @@ module {
     call @dump(%d31) : (tensor<2x3x4xf64>) -> ()
 
     //
-    // Release sparse tensors.
+    // Release the resources.
     //
     bufferization.dealloc_tensor %t13 : tensor<2x3x4xf64, #Tensor3>
     bufferization.dealloc_tensor %t31 : tensor<2x3x4xf64, #Tensor1>
     bufferization.dealloc_tensor %s1 : tensor<2x3x4xf64, #Tensor1>
     bufferization.dealloc_tensor %s3 : tensor<2x3x4xf64, #Tensor3>
+    bufferization.dealloc_tensor %d13 : tensor<2x3x4xf64>
+    bufferization.dealloc_tensor %d31 : tensor<2x3x4xf64>
 
     return
   }
@@ -167,12 +169,14 @@ module {
     call @dump(%d31) : (tensor<2x3x4xf64>) -> ()
 
     //
-    // Release sparse tensors.
+    // Release the resources.
     //
     bufferization.dealloc_tensor %t13 : tensor<2x3x4xf64, #SingletonTensor3>
     bufferization.dealloc_tensor %t31 : tensor<2x3x4xf64, #SingletonTensor1>
     bufferization.dealloc_tensor %s1 : tensor<2x3x4xf64, #SingletonTensor1>
     bufferization.dealloc_tensor %s3 : tensor<2x3x4xf64, #SingletonTensor3>
+    bufferization.dealloc_tensor %d13 : tensor<2x3x4xf64>
+    bufferization.dealloc_tensor %d31 : tensor<2x3x4xf64>
 
     return
   }
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir
index aa1bd04..fe88362 100755
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir
@@ -140,10 +140,19 @@ module {
     sparse_tensor.print %s1 : tensor<4x3x2xf32, #BatchedCSR>
     sparse_tensor.print %s2 : tensor<4x3x2xf32, #CSRDense>
 
-    // FIXME: doing this explicitly crashes runtime
-    // bufferization.dealloc_tensor %s0 : tensor<4x3x2xf32, #CCC>
-    // bufferization.dealloc_tensor %s1 : tensor<4x3x2xf32, #BatchedCSR>
-    // bufferization.dealloc_tensor %s2 : tensor<4x3x2xf32, #CSRDense>
+    // TODO: This check is no longer needed once the codegen path uses the
+    // buffer deallocation pass. "dealloc_tensor" turn into a no-op in the
+    // codegen path.
+    %has_runtime = sparse_tensor.has_runtime_library
+    scf.if %has_runtime {
+      // sparse_tensor.assemble copies buffers when running with the runtime
+      // library. Deallocations are needed not needed when running in codgen
+      // mode.
+      bufferization.dealloc_tensor %s0 : tensor<4x3x2xf32, #CCC>
+      bufferization.dealloc_tensor %s1 : tensor<4x3x2xf32, #BatchedCSR>
+      bufferization.dealloc_tensor %s2 : tensor<4x3x2xf32, #CSRDense>
+    }
+
     return
   }
 }
diff --git a/mlir/test/Target/Cpp/invalid.mlir b/mlir/test/Target/Cpp/invalid.mlir
index 552c04a..513371a 100644
--- a/mlir/test/Target/Cpp/invalid.mlir
+++ b/mlir/test/Target/Cpp/invalid.mlir
@@ -10,10 +10,10 @@ func.func @multiple_blocks() {
 
 // -----
 
-func.func @unsupported_std_op(%arg0: f64) -> f64 {
-  // expected-error@+1 {{'math.absf' op unable to find printer for op}}
-  %0 = math.absf %arg0 : f64
-  return %0 : f64
+func.func @unsupported_op(%arg0: i1) {
+  // expected-error@+1 {{'cf.assert' op unable to find printer for op}}
+  cf.assert %arg0, "assertion foo"
+  return
 }
 
 // -----
diff --git a/mlir/test/Target/Cpp/subscript.mlir b/mlir/test/Target/Cpp/subscript.mlir
new file mode 100644
index 0000000..a6c82df
--- /dev/null
+++ b/mlir/test/Target/Cpp/subscript.mlir
@@ -0,0 +1,32 @@
+// RUN: mlir-translate -mlir-to-cpp %s | FileCheck %s
+// RUN: mlir-translate -mlir-to-cpp -declare-variables-at-top %s | FileCheck %s
+
+func.func @load_store(%arg0: !emitc.array<4x8xf32>, %arg1: !emitc.array<3x5xf32>, %arg2: index, %arg3: index) {
+  %0 = emitc.subscript %arg0[%arg2, %arg3] : <4x8xf32>, index, index
+  %1 = emitc.subscript %arg1[%arg2, %arg3] : <3x5xf32>, index, index
+  emitc.assign %0 : f32 to %1 : f32
+  return
+}
+// CHECK: void load_store(float [[ARR1:[^ ]*]][4][8], float [[ARR2:[^ ]*]][3][5],
+// CHECK-SAME:            size_t [[I:[^ ]*]], size_t [[J:[^ ]*]])
+// CHECK-NEXT: [[ARR2]][[[I]]][[[J]]] = [[ARR1]][[[I]]][[[J]]];
+
+emitc.func @func1(%arg0 : f32) {
+  emitc.return
+}
+
+emitc.func @call_arg(%arg0: !emitc.array<4x8xf32>, %i: i32, %j: i16,
+                     %k: i8) {
+  %0 = emitc.subscript %arg0[%i, %j] : <4x8xf32>, i32, i16
+  %1 = emitc.subscript %arg0[%j, %k] : <4x8xf32>, i16, i8
+
+  emitc.call @func1 (%0) : (f32) -> ()
+  emitc.call_opaque "func2" (%1) : (f32) -> ()
+  emitc.call_opaque "func3" (%0, %1) { args = [1 : index, 0 : index] } : (f32, f32) -> ()
+  emitc.return
+}
+// CHECK: void call_arg(float [[ARR1:[^ ]*]][4][8], int32_t [[I:[^ ]*]],
+// CHECK-SAME:          int16_t [[J:[^ ]*]], int8_t [[K:[^ ]*]])
+// CHECK-NEXT: func1([[ARR1]][[[I]]][[[J]]]);
+// CHECK-NEXT: func2([[ARR1]][[[J]]][[[K]]]);
+// CHECK-NEXT: func3([[ARR1]][[[J]]][[[K]]], [[ARR1]][[[I]]][[[J]]]);
diff --git a/mlir/test/Target/LLVMIR/Import/debug-info.ll b/mlir/test/Target/LLVMIR/Import/debug-info.ll
index 9ef6580..9af40d8 100644
--- a/mlir/test/Target/LLVMIR/Import/debug-info.ll
+++ b/mlir/test/Target/LLVMIR/Import/debug-info.ll
@@ -296,12 +296,17 @@ define void @class_method() {
   ret void, !dbg !9
 }
 
-; Verify the elements parameter is dropped due to the cyclic dependencies.
-; CHECK: #[[COMP:.+]] = #llvm.di_composite_type<tag = DW_TAG_class_type, name = "class_name", file = #{{.*}}, line = 42, flags = "TypePassByReference|NonTrivial">
-; CHECK: #[[COMP_PTR:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[COMP]], sizeInBits = 64>
+; Verify the cyclic composite type is identified, even though conversion begins from the subprogram type.
+; CHECK: #[[COMP_SELF:.+]] = #llvm.di_composite_type<tag = DW_TAG_null, recId = [[REC_ID:.+]]>
+; CHECK: #[[COMP_PTR:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[COMP_SELF]], sizeInBits = 64>
 ; CHECK: #[[SP_TYPE:.+]] = #llvm.di_subroutine_type<types = #{{.*}}, #[[COMP_PTR]]>
-; CHECK: #[[SP:.+]] = #llvm.di_subprogram<id = distinct[{{.*}}]<>, compileUnit = #{{.*}}, scope = #[[COMP]], name = "class_method", file = #{{.*}}, subprogramFlags = Definition, type = #[[SP_TYPE]]>
-; CHECK: #[[LOC]] = loc(fused<#[[SP]]>
+; CHECK: #[[SP_INNER:.+]] = #llvm.di_subprogram<id = [[SP_ID:.+]], compileUnit = #{{.*}}, scope = #[[COMP_SELF]], name = "class_method", file = #{{.*}}, subprogramFlags = Definition, type = #[[SP_TYPE]]>
+; CHECK: #[[COMP:.+]] = #llvm.di_composite_type<tag = DW_TAG_class_type, recId = [[REC_ID]], name = "class_name", file = #{{.*}}, line = 42, flags = "TypePassByReference|NonTrivial", elements = #[[SP_INNER]]>
+
+; CHECK: #[[COMP_PTR_OUTER:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[COMP]], sizeInBits = 64>
+; CHECK: #[[SP_TYPE_OUTER:.+]] = #llvm.di_subroutine_type<types = #{{.*}}, #[[COMP_PTR_OUTER]]>
+; CHECK: #[[SP_OUTER:.+]] = #llvm.di_subprogram<id = [[SP_ID]], compileUnit = #{{.*}}, scope = #[[COMP]], name = "class_method", file = #{{.*}}, subprogramFlags = Definition, type = #[[SP_TYPE_OUTER]]>
+; CHECK: #[[LOC]] = loc(fused<#[[SP_OUTER]]>
 
 !llvm.dbg.cu = !{!1}
 !llvm.module.flags = !{!0}
@@ -318,15 +323,18 @@ define void @class_method() {
 
 ; // -----
 
-; Verify the elements parameter is dropped due to the cyclic dependencies.
-; CHECK: #[[$COMP:.+]] = #llvm.di_composite_type<tag = DW_TAG_class_type, name = "class_field", file = #{{.*}}, line = 42, flags = "TypePassByReference|NonTrivial">
-; CHECK: #[[$COMP_PTR:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[$COMP]]>
-; CHECK: #[[$VAR0:.+]] = #llvm.di_local_variable<scope = #{{.*}}, name = "class_field", file = #{{.*}}, type = #[[$COMP_PTR]]>
+; Verify the cyclic composite type is handled correctly.
+; CHECK: #[[COMP_SELF:.+]] = #llvm.di_composite_type<tag = DW_TAG_null, recId = [[REC_ID:.+]]>
+; CHECK: #[[COMP_PTR_INNER:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[COMP_SELF]]>
+; CHECK: #[[FIELD:.+]] = #llvm.di_derived_type<tag = DW_TAG_member, name = "call_field", baseType = #[[COMP_PTR_INNER]]>
+; CHECK: #[[COMP:.+]] = #llvm.di_composite_type<tag = DW_TAG_class_type, recId = [[REC_ID]], name = "class_field", file = #{{.*}}, line = 42, flags = "TypePassByReference|NonTrivial", elements = #[[FIELD]]>
+; CHECK: #[[COMP_PTR_OUTER:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[COMP]]>
+; CHECK: #[[VAR0:.+]] = #llvm.di_local_variable<scope = #{{.*}}, name = "class_field", file = #{{.*}}, type = #[[COMP_PTR_OUTER]]>
 
-; CHECK-LABEL: @class_field
+; CHECK: @class_field
 ; CHECK-SAME:  %[[ARG0:[a-zA-Z0-9]+]]
 define void @class_field(ptr %arg1) {
-  ; CHECK: llvm.intr.dbg.value #[[$VAR0]] = %[[ARG0]] : !llvm.ptr
+  ; CHECK: llvm.intr.dbg.value #[[VAR0]] = %[[ARG0]] : !llvm.ptr
   call void @llvm.dbg.value(metadata ptr %arg1, metadata !7, metadata !DIExpression()), !dbg !9
   ret void
 }
@@ -563,35 +571,6 @@ define void @func_in_module(ptr %arg) !dbg !8 {
 
 ; // -----
 
-; Verifies that array types that have an unimportable base type are removed to
-; avoid producing invalid IR.
-; CHECK: #[[DI_LOCAL_VAR:.+]] = #llvm.di_local_variable<
-; CHECK-NOT: type =
-
-; CHECK-LABEL: @array_with_cyclic_base_type
-define i32 @array_with_cyclic_base_type(ptr %0) !dbg !3 {
-  call void @llvm.dbg.value(metadata ptr %0, metadata !4, metadata !DIExpression()), !dbg !7
-  ret i32 0
-}
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare void @llvm.dbg.value(metadata, metadata, metadata)
-
-
-!llvm.module.flags = !{!0}
-!llvm.dbg.cu = !{!1}
-
-!0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2)
-!2 = !DIFile(filename: "debug-info.ll", directory: "/")
-!3 = distinct !DISubprogram(name: "func", scope: !2, file: !2, line: 46, scopeLine: 48, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1)
-!4 = !DILocalVariable(name: "op", arg: 5, scope: !3, file: !2, line: 47, type: !5)
-!5 = !DICompositeType(tag: DW_TAG_array_type, size: 42, baseType: !6)
-!6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5)
-!7 = !DILocation(line: 0, scope: !3)
-
-; // -----
-
 ; Verifies that import compile units respect the distinctness of the input.
 ; CHECK-LABEL: @distinct_cu_func0
 define void @distinct_cu_func0() !dbg !4 {
diff --git a/mlir/test/Target/LLVMIR/Import/import-failure.ll b/mlir/test/Target/LLVMIR/Import/import-failure.ll
index 9a4e939..3f4efab 100644
--- a/mlir/test/Target/LLVMIR/Import/import-failure.ll
+++ b/mlir/test/Target/LLVMIR/Import/import-failure.ll
@@ -85,38 +85,6 @@ define void @unsupported_argument(i64 %arg1) {
 
 ; // -----
 
-; Check that debug intrinsics that depend on cyclic metadata are dropped.
-
-declare void @llvm.dbg.value(metadata, metadata, metadata)
-
-; CHECK:      import-failure.ll
-; CHECK-SAME: warning: dropped instruction: call void @llvm.dbg.label(metadata !{{.*}})
-; CHECK:      import-failure.ll
-; CHECK-SAME: warning: dropped intrinsic: call void @llvm.dbg.value(metadata i64 %{{.*}}, metadata !3, metadata !DIExpression())
-define void @cylic_metadata(i64 %arg1) {
-  call void @llvm.dbg.value(metadata i64 %arg1, metadata !10, metadata !DIExpression()), !dbg !14
-  call void @llvm.dbg.label(metadata !13), !dbg !14
-  ret void
-}
-
-!llvm.dbg.cu = !{!1}
-!llvm.module.flags = !{!0}
-!0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2)
-!2 = !DIFile(filename: "import-failure.ll", directory: "/")
-!3 = !DICompositeType(tag: DW_TAG_array_type, size: 42, baseType: !4)
-!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !3)
-!5 = distinct !DISubprogram(name: "class_method", scope: !2, file: !2, type: !6, spFlags: DISPFlagDefinition, unit: !1)
-!6 = !DISubroutineType(types: !7)
-!7 = !{!3}
-!10 = !DILocalVariable(scope: !5, name: "arg1", file: !2, line: 1, arg: 1, align: 64);
-!11 = !DILexicalBlock(scope: !5)
-!12 = !DILexicalBlockFile(scope: !11, discriminator: 0)
-!13 = !DILabel(scope: !12, name: "label", file: !2, line: 42)
-!14 = !DILocation(line: 1, column: 2, scope: !5)
-
-; // -----
-
 ; global_dtors with non-null data fields cannot be represented in MLIR.
 ; CHECK:      <unknown>
 ; CHECK-SAME: error: unhandled global variable: @llvm.global_dtors
diff --git a/mlir/test/Target/LLVMIR/llvmir-debug.mlir b/mlir/test/Target/LLVMIR/llvmir-debug.mlir
index cfd5239..5d70bff 100644
--- a/mlir/test/Target/LLVMIR/llvmir-debug.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir-debug.mlir
@@ -342,3 +342,63 @@ llvm.func @func_line_tables() {
 llvm.func @func_debug_directives() {
   llvm.return
 } loc(fused<#di_subprogram_2>["foo2.mlir":0:0])
+
+// -----
+
+// Ensure recursive types with multiple external references work.
+
+// Common base nodes.
+#di_file = #llvm.di_file<"test.mlir" in "/">
+#di_null_type = #llvm.di_null_type
+#di_compile_unit = #llvm.di_compile_unit<id = distinct[1]<>, sourceLanguage = DW_LANG_C, file = #di_file, isOptimized = false, emissionKind = None>
+
+// Recursive type itself.
+#di_struct_self = #llvm.di_composite_type<tag = DW_TAG_null, recId = distinct[0]<>>
+#di_ptr_inner = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #di_struct_self, sizeInBits = 64>
+#di_subroutine_inner = #llvm.di_subroutine_type<types = #di_null_type, #di_ptr_inner>
+#di_subprogram_inner = #llvm.di_subprogram<
+  id = distinct[2]<>,
+  compileUnit = #di_compile_unit,
+  scope = #di_struct_self,
+  name = "class_method",
+  file = #di_file,
+  subprogramFlags = Definition,
+  type = #di_subroutine_inner>
+#di_struct = #llvm.di_composite_type<
+  tag = DW_TAG_class_type,
+  recId = distinct[0]<>,
+  name = "class_name",
+  file = #di_file,
+  line = 42,
+  flags = "TypePassByReference|NonTrivial",
+  elements = #di_subprogram_inner>
+
+// Outer types referencing the entire recursive type.
+#di_ptr_outer = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #di_struct, sizeInBits = 64>
+#di_subroutine_outer = #llvm.di_subroutine_type<types = #di_null_type, #di_ptr_outer>
+#di_subprogram_outer = #llvm.di_subprogram<
+  id = distinct[2]<>,
+  compileUnit = #di_compile_unit,
+  scope = #di_struct,
+  name = "class_method",
+  file = #di_file,
+  subprogramFlags = Definition,
+  type = #di_subroutine_outer>
+
+#loc3 = loc(fused<#di_subprogram_outer>["test.mlir":1:1])
+
+// CHECK: @class_method
+// CHECK: ret void, !dbg ![[LOC:.*]]
+
+// CHECK: ![[CU:.*]] = distinct !DICompileUnit(
+// CHECK: ![[SP:.*]] = distinct !DISubprogram(name: "class_method", scope: ![[STRUCT:.*]], file: !{{.*}}, type: ![[SUBROUTINE:.*]], spFlags: DISPFlagDefinition, unit: ![[CU]])
+// CHECK: ![[STRUCT]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "class_name", {{.*}}, elements: ![[ELEMS:.*]])
+// CHECK: ![[ELEMS]] = !{![[SP]]}
+// CHECK: ![[SUBROUTINE]] = !DISubroutineType(types: ![[SUBROUTINE_ELEMS:.*]])
+// CHECK: ![[SUBROUTINE_ELEMS]] = !{null, ![[PTR:.*]]}
+// CHECK: ![[PTR]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[STRUCT]], size: 64)
+// CHECK: ![[LOC]] = !DILocation(line: 1, column: 1, scope: ![[SP]])
+
+llvm.func @class_method() {
+  llvm.return loc(#loc3)
+} loc(#loc3)
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 93550f5..ce6b56d 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -88,13 +88,20 @@ llvm.func @rocdl.bpermute(%src : i32) -> i32 {
   llvm.return %0 : i32
 }
 
-llvm.func @rocdl.ballot(%pred : i1) -> i32 {
-  // CHECK-LABEL: rocdl.ballot
+llvm.func @rocdl.ballot32(%pred : i1) -> i32 {
+  // CHECK-LABEL: rocdl.ballot32
   // CHECK: call i32 @llvm.amdgcn.ballot
   %0 = rocdl.ballot %pred : i32
   llvm.return %0 : i32
 }
 
+llvm.func @rocdl.ballot64(%pred : i1) -> i64 {
+  // CHECK-LABEL: rocdl.ballot64
+  // CHECK: call i64 @llvm.amdgcn.ballot
+  %0 = rocdl.ballot %pred : i64
+  llvm.return %0 : i64
+}
+
 llvm.func @rocdl.waitcnt() {
   // CHECK-LABEL: rocdl.waitcnt
   // CHECK-NEXT: call void @llvm.amdgcn.s.waitcnt(i32 0)
diff --git a/mlir/test/Transforms/buffer-results-to-out-params-add-result-attr.mlir b/mlir/test/Transforms/buffer-results-to-out-params-add-result-attr.mlir
new file mode 100644
index 0000000..f4a95c7
--- /dev/null
+++ b/mlir/test/Transforms/buffer-results-to-out-params-add-result-attr.mlir
@@ -0,0 +1,17 @@
+// RUN: mlir-opt -p 'builtin.module(buffer-results-to-out-params{add-result-attr})' -split-input-file -verify-diagnostics %s | FileCheck %s
+
+// CHECK-LABEL: @basic({{.*}}: memref<f32> {bufferize.result})
+func.func @basic() -> (memref<f32>) {
+  %0 = "test.source"() : () -> (memref<f32>)
+  return %0 : memref<f32>
+}
+
+// -----
+
+// CHECK-LABEL: multiple_results
+// CHECK-SAME:  memref<1xf32> {bufferize.result}
+// CHECK-SAME:  memref<2xf32> {bufferize.result}
+func.func @multiple_results() -> (memref<1xf32>, memref<2xf32>) {
+  %0, %1 = "test.source"() : () -> (memref<1xf32>, memref<2xf32>)
+  return %0, %1 : memref<1xf32>, memref<2xf32>
+}
diff --git a/mlir/test/mlir-opt/nearmiss.mlir b/mlir/test/mlir-opt/split-markers.mlir
index 2f69551..665a37f 100644
--- a/mlir/test/mlir-opt/nearmiss.mlir
+++ b/mlir/test/mlir-opt/split-markers.mlir
@@ -1,6 +1,13 @@
-// RUN: mlir-opt --split-input-file --verify-diagnostics %s 2> %t &&  FileCheck --input-file %t %s
+// Check near-miss mechanics:
+// RUN: mlir-opt --split-input-file --verify-diagnostics %s 2> %t \
+// RUN: &&  FileCheck --input-file %t %s
 // RUN: cat %t
 
+// Check that (1) custom input splitter and (2) custom output splitters work.
+// RUN: mlir-opt %s -split-input-file="// CHECK: ""----" \
+// RUN:   -output-split-marker="// ---- next split ----" \
+// RUN: | FileCheck --check-prefix=CHECK-SPLITTERS %s
+
 func.func @main() {return}
 
 // -----
@@ -20,3 +27,9 @@ func.func @bar2() {return }
 
 // No error flagged at the end for a near miss.
 // ----
+
+// CHECK-SPLITTERS: module
+// CHECK-SPLITTERS: ---- next split ----
+// CHECK-SPLITTERS: module
+// CHECK-SPLITTERS: ---- next split ----
+// CHECK-SPLITTERS: module
diff --git a/mlir/test/mlir-pdll/split-markers.pdll b/mlir/test/mlir-pdll/split-markers.pdll
new file mode 100644
index 0000000..45e409a
--- /dev/null
+++ b/mlir/test/mlir-pdll/split-markers.pdll
@@ -0,0 +1,36 @@
+// Check that (1) the default input split marker used if no custom marker is
+// specified and (2) the output file is merged using the default marker.
+// RUN: mlir-pdll %s -split-input-file \
+// RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s
+
+// Check that the custom (3) input and (output) split markers are used if
+// provided.
+// RUN: mlir-pdll %s \
+// RUN:   -split-input-file="// ""=====" -output-split-marker "// #####" \
+// RUN: | FileCheck -check-prefix=CHECK-CUSTOM %s
+
+// CHECK-DEFAULT:      Module
+// CHECK-DEFAULT-NEXT: PatternDecl
+// CHECK-DEFAULT-NOT:  PatternDecl
+// CHECK-DEFAULT:      //{{ }}-----
+// CHECK-DEFAULT-NEXT: Module
+// CHECK-DEFAULT-NEXT: PatternDecl
+// CHECK-DEFAULT:      PatternDecl
+
+// CHECK-CUSTOM:      Module
+// CHECK-CUSTOM-NEXT: PatternDecl
+// CHECK-CUSTOM:      PatternDecl
+// CHECK-CUSTOM:      // #####
+// CHECK-CUSTOM-NEXT: Module
+// CHECK-CUSTOM-NEXT: PatternDecl
+// CHECK-CUSTOM-NOT:  PatternDecl
+
+Pattern => erase op<test.op>;
+
+// -----
+
+Pattern => erase op<test.op2>;
+
+// =====
+
+Pattern => erase op<test.op3>;
diff --git a/mlir/test/mlir-translate/split-markers.mlir b/mlir/test/mlir-translate/split-markers.mlir
new file mode 100644
index 0000000..ed576bc
--- /dev/null
+++ b/mlir/test/mlir-translate/split-markers.mlir
@@ -0,0 +1,35 @@
+// Check that (1) the output split marker is inserted and (2) the input file is
+// split using the default split marker.
+// RUN: mlir-translate %s -split-input-file -mlir-to-llvmir \
+// RUN:   -output-split-marker="; -----" \
+// RUN: | FileCheck -check-prefix=CHECK-OUTPUT %s
+
+// With the second command, check that (3) the input split marker is used and
+// (4) the output split marker is empty if not specified.
+// RUN: mlir-translate %s -split-input-file="// ""-----" -mlir-to-llvmir \
+// RUN:   -output-split-marker="; -----" \
+// RUN: | mlir-translate -split-input-file -import-llvm \
+// RUN:   -split-input-file="; -----" \
+// RUN: | FileCheck -check-prefix=CHECK-ROUNDTRIP %s
+
+// Check that (5) the input is not split if `-split-input-file` is not given.
+// RUN: mlir-translate %s -mlir-to-llvmir \
+// RUN: | FileCheck -check-prefix=CHECK-NOSPLIT %s
+
+// CHECK-OUTPUT:      ModuleID
+// CHECK-OUTPUT:      ; -----
+// CHECK-OUTPUT-NEXT: ModuleID
+
+// CHECK-ROUNDTRIP:       module {{.*}} {
+// CHECK-ROUNDTRIP-NEXT:  }
+// CHECK-ROUNDTRIP-EMPTY:
+// CHECK-ROUNDTRIP:       module
+
+// CHECK-NOSPLIT:     ModuleID
+// CHECK-NOSPLIT-NOT: ModuleID
+
+module {}
+
+// -----
+
+module {}
diff --git a/mlir/tools/mlir-pdll/mlir-pdll.cpp b/mlir/tools/mlir-pdll/mlir-pdll.cpp
index 43e4aa5..d312765e 100644
--- a/mlir/tools/mlir-pdll/mlir-pdll.cpp
+++ b/mlir/tools/mlir-pdll/mlir-pdll.cpp
@@ -136,11 +136,20 @@ int main(int argc, char **argv) {
       llvm::cl::desc(
           "Print out the parsed ODS information from the input file"),
       llvm::cl::init(false));
-  llvm::cl::opt<bool> splitInputFile(
-      "split-input-file",
-      llvm::cl::desc("Split the input file into pieces and process each "
-                     "chunk independently"),
-      llvm::cl::init(false));
+  llvm::cl::opt<std::string> inputSplitMarker(
+      "split-input-file", llvm::cl::ValueOptional,
+      llvm::cl::callback([&](const std::string &str) {
+        // Implicit value: use default marker if flag was used without value.
+        if (str.empty())
+          inputSplitMarker.setValue(kDefaultSplitMarker);
+      }),
+      llvm::cl::desc("Split the input file into chunks using the given or "
+                     "default marker and process each chunk independently"),
+      llvm::cl::init(""));
+  llvm::cl::opt<std::string> outputSplitMarker(
+      "output-split-marker",
+      llvm::cl::desc("Split marker to use for merging the ouput"),
+      llvm::cl::init(kDefaultSplitMarker));
   llvm::cl::opt<enum OutputType> outputType(
       "x", llvm::cl::init(OutputType::AST),
       llvm::cl::desc("The type of output desired"),
@@ -187,7 +196,7 @@ int main(int argc, char **argv) {
                          dumpODS, includedFiles);
   };
   if (failed(splitAndProcessBuffer(std::move(inputFile), processFn, outputStrOS,
-                                   splitInputFile)))
+                                   inputSplitMarker, outputSplitMarker)))
     return 1;
 
   // Write the output.
diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt
index a74eff0..b382137 100644
--- a/openmp/libomptarget/CMakeLists.txt
+++ b/openmp/libomptarget/CMakeLists.txt
@@ -56,6 +56,8 @@ set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-pc-linux-gnu-L
 set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda")
 set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-LTO")
 set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-JIT-LTO")
+set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} s390x-ibm-linux-gnu")
+set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} s390x-ibm-linux-gnu-LTO")
 
 # Once the plugins for the different targets are validated, they will be added to
 # the list of supported targets in the current system.
diff --git a/openmp/libomptarget/plugins-nextgen/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
index 3ca0236..b6fc136 100644
--- a/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
@@ -12,12 +12,15 @@
 
 add_subdirectory(common)
 
-# void build_generic_elf64(string tmachine, string tmachine_name, string tmachine_libname, string elf_machine_id);
+# void build_generic_elf64(string tmachine, string tmachine_name, string tmachine_libname,
+#                          string tmachine_llvm, string tmachine_triple, string elf_machine_id);
 # - build a plugin for an ELF based generic 64-bit target based on libffi.
 # - tmachine: name of the machine processor as used in the cmake build system.
 # - tmachine_name: name of the machine to be printed with the debug messages.
 # - tmachine_libname: machine name to be appended to the plugin library name.
-macro(build_generic_elf64 tmachine tmachine_name tmachine_libname tmachine_triple elf_machine_id)
+# - tmachine_llvm: LLVM triple for the processor
+# - tmachine_triple: GNU target triple
+macro(build_generic_elf64 tmachine tmachine_name tmachine_libname tmachine_llvm tmachine_triple elf_machine_id)
 if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
   # Define macro to be used as prefix of the runtime messages for this target.
   add_definitions("-DTARGET_NAME=${tmachine_name}")
@@ -30,7 +33,7 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
   add_definitions("-DTARGET_ELF_ID=${elf_machine_id}")
 
   # Define target triple
-  add_definitions("-DLIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE=${tmachine}")
+  add_definitions("-DLIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE=${tmachine_llvm}")
 
   add_llvm_library("omptarget.rtl.${tmachine_libname}"
     SHARED
diff --git a/openmp/libomptarget/plugins-nextgen/aarch64/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/aarch64/CMakeLists.txt
index 2c2b753..663ab4d 100644
--- a/openmp/libomptarget/plugins-nextgen/aarch64/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/aarch64/CMakeLists.txt
@@ -11,7 +11,7 @@
 ##===----------------------------------------------------------------------===##
 
 if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-  build_generic_elf64("aarch64" "aarch64" "aarch64" "aarch64-unknown-linux-gnu" "183")
+  build_generic_elf64("aarch64" "aarch64" "aarch64" "aarch64" "aarch64-unknown-linux-gnu" "183")
 else()
  libomptarget_say("Not building aarch64 NextGen offloading plugin: machine not found in the system.")
 endif()
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
index 5c76799..829b4b7 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/GlobalHandler.h
@@ -104,7 +104,7 @@ public:
   virtual ~GenericGlobalHandlerTy() {}
 
   /// Helper function for getting an ELF from a device image.
-  Expected<ELF64LEObjectFile> getELFObjectFile(DeviceImageTy &Image);
+  Expected<std::unique_ptr<ObjectFile>> getELFObjectFile(DeviceImageTy &Image);
 
   /// Returns whether the symbol named \p SymName is present in the given \p
   /// Image.
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/Utils/ELF.h b/openmp/libomptarget/plugins-nextgen/common/include/Utils/ELF.h
index 140a6b6..88c83d3 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/Utils/ELF.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/Utils/ELF.h
@@ -28,17 +28,15 @@ bool isELF(llvm::StringRef Buffer);
 llvm::Expected<bool> checkMachine(llvm::StringRef Object, uint16_t EMachine);
 
 /// Returns a pointer to the given \p Symbol inside of an ELF object.
-llvm::Expected<const void *> getSymbolAddress(
-    const llvm::object::ELFObjectFile<llvm::object::ELF64LE> &ELFObj,
-    const llvm::object::ELF64LE::Sym &Symbol);
+llvm::Expected<const void *>
+getSymbolAddress(const llvm::object::ELFSymbolRef &Symbol);
 
 /// Returns the symbol associated with the \p Name in the \p ELFObj. It will
 /// first search for the hash sections to identify symbols from the hash table.
 /// If that fails it will fall back to a linear search in the case of an
 /// executable file without a hash table.
-llvm::Expected<const typename llvm::object::ELF64LE::Sym *>
-getSymbol(const llvm::object::ELFObjectFile<llvm::object::ELF64LE> &ELFObj,
-          llvm::StringRef Name);
+llvm::Expected<std::optional<llvm::object::ELFSymbolRef>>
+getSymbol(const llvm::object::ObjectFile &ELFObj, llvm::StringRef Name);
 
 } // namespace elf
 } // namespace utils
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
index d398f60..ba0aa47 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -25,14 +25,12 @@ using namespace omp;
 using namespace target;
 using namespace plugin;
 
-Expected<ELF64LEObjectFile>
+Expected<std::unique_ptr<ObjectFile>>
 GenericGlobalHandlerTy::getELFObjectFile(DeviceImageTy &Image) {
   assert(utils::elf::isELF(Image.getMemoryBuffer().getBuffer()) &&
          "Input is not an ELF file");
 
-  Expected<ELF64LEObjectFile> ElfOrErr =
-      ELF64LEObjectFile::create(Image.getMemoryBuffer());
-  return ElfOrErr;
+  return ELFObjectFileBase::createELFObjectFile(Image.getMemoryBuffer());
 }
 
 Error GenericGlobalHandlerTy::moveGlobalBetweenDeviceAndHost(
@@ -91,13 +89,13 @@ bool GenericGlobalHandlerTy::isSymbolInImage(GenericDeviceTy &Device,
   }
 
   // Search the ELF symbol using the symbol name.
-  auto SymOrErr = utils::elf::getSymbol(*ELFObjOrErr, SymName);
+  auto SymOrErr = utils::elf::getSymbol(**ELFObjOrErr, SymName);
   if (!SymOrErr) {
     consumeError(SymOrErr.takeError());
     return false;
   }
 
-  return *SymOrErr;
+  return SymOrErr->has_value();
 }
 
 Error GenericGlobalHandlerTy::getGlobalMetadataFromImage(
@@ -110,17 +108,17 @@ Error GenericGlobalHandlerTy::getGlobalMetadataFromImage(
     return ELFObj.takeError();
 
   // Search the ELF symbol using the symbol name.
-  auto SymOrErr = utils::elf::getSymbol(*ELFObj, ImageGlobal.getName());
+  auto SymOrErr = utils::elf::getSymbol(**ELFObj, ImageGlobal.getName());
   if (!SymOrErr)
     return Plugin::error("Failed ELF lookup of global '%s': %s",
                          ImageGlobal.getName().data(),
                          toString(SymOrErr.takeError()).data());
 
-  if (!*SymOrErr)
+  if (!SymOrErr->has_value())
     return Plugin::error("Failed to find global symbol '%s' in the ELF image",
                          ImageGlobal.getName().data());
 
-  auto AddrOrErr = utils::elf::getSymbolAddress(*ELFObj, **SymOrErr);
+  auto AddrOrErr = utils::elf::getSymbolAddress(**SymOrErr);
   // Get the section to which the symbol belongs.
   if (!AddrOrErr)
     return Plugin::error("Failed to get ELF symbol from global '%s': %s",
@@ -129,7 +127,7 @@ Error GenericGlobalHandlerTy::getGlobalMetadataFromImage(
 
   // Setup the global symbol's address and size.
   ImageGlobal.setPtr(const_cast<void *>(*AddrOrErr));
-  ImageGlobal.setSize((*SymOrErr)->st_size);
+  ImageGlobal.setSize((*SymOrErr)->getSize());
 
   return Plugin::success();
 }
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.cpp b/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.cpp
index c84c3ba..2ae97f0 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.cpp
@@ -36,18 +36,10 @@ bool utils::elf::isELF(StringRef Buffer) {
   }
 }
 
-Expected<bool> utils::elf::checkMachine(StringRef Object, uint16_t EMachine) {
-  assert(isELF(Object) && "Input is not an ELF!");
-
-  Expected<ELF64LEObjectFile> ElfOrErr =
-      ELF64LEObjectFile::create(MemoryBufferRef(Object, /*Identifier=*/""),
-                                /*InitContent=*/false);
-  if (!ElfOrErr)
-    return ElfOrErr.takeError();
-
-  const auto Header = ElfOrErr->getELFFile().getHeader();
-  if (Header.e_ident[EI_CLASS] != ELFCLASS64)
-    return createError("Only 64-bit ELF files are supported");
+template <class ELFT>
+static Expected<bool>
+checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
+  const auto Header = ELFObj.getELFFile().getHeader();
   if (Header.e_type != ET_EXEC && Header.e_type != ET_DYN)
     return createError("Only executable ELF files are supported");
 
@@ -71,6 +63,25 @@ Expected<bool> utils::elf::checkMachine(StringRef Object, uint16_t EMachine) {
   return Header.e_machine == EMachine;
 }
 
+Expected<bool> utils::elf::checkMachine(StringRef Object, uint16_t EMachine) {
+  assert(isELF(Object) && "Input is not an ELF!");
+
+  Expected<std::unique_ptr<ObjectFile>> ElfOrErr =
+      ObjectFile::createELFObjectFile(
+          MemoryBufferRef(Object, /*Identifier=*/""),
+          /*InitContent=*/false);
+  if (!ElfOrErr)
+    return ElfOrErr.takeError();
+
+  if (const ELF64LEObjectFile *ELFObj =
+          dyn_cast<ELF64LEObjectFile>(&**ElfOrErr))
+    return checkMachineImpl(*ELFObj, EMachine);
+  if (const ELF64BEObjectFile *ELFObj =
+          dyn_cast<ELF64BEObjectFile>(&**ElfOrErr))
+    return checkMachineImpl(*ELFObj, EMachine);
+  return createError("Only 64-bit ELF files are supported");
+}
+
 template <class ELFT>
 static Expected<const typename ELFT::Sym *>
 getSymbolFromGnuHashTable(StringRef Name, const typename ELFT::GnuHash &HashTab,
@@ -138,9 +149,10 @@ getSymbolFromSysVHashTable(StringRef Name, const typename ELFT::Hash &HashTab,
 }
 
 template <class ELFT>
-static Expected<const typename ELFT::Sym *>
-getHashTableSymbol(const ELFFile<ELFT> &Elf, const typename ELFT::Shdr &Sec,
-                   StringRef Name) {
+static Expected<std::optional<ELFSymbolRef>>
+getHashTableSymbol(const ELFObjectFile<ELFT> &ELFObj,
+                   const typename ELFT::Shdr &Sec, StringRef Name) {
+  const ELFFile<ELFT> &Elf = ELFObj.getELFFile();
   if (Sec.sh_type != ELF::SHT_HASH && Sec.sh_type != ELF::SHT_GNU_HASH)
     return createError(
         "invalid sh_type for hash table, expected SHT_HASH or SHT_GNU_HASH");
@@ -179,7 +191,12 @@ getHashTableSymbol(const ELFFile<ELFT> &Elf, const typename ELFT::Shdr &Sec,
                 sizeof(typename ELFT::Word) * HashTab->nbuckets +
                 sizeof(typename ELFT::Word) * (SymTab.size() - HashTab->symndx))
       return createError("section has invalid sh_size: " + Twine(Sec.sh_size));
-    return getSymbolFromGnuHashTable<ELFT>(Name, *HashTab, SymTab, StrTab);
+    auto Sym = getSymbolFromGnuHashTable<ELFT>(Name, *HashTab, SymTab, StrTab);
+    if (!Sym)
+      return Sym.takeError();
+    if (!*Sym)
+      return std::nullopt;
+    return ELFObj.toSymbolRef(*SymTabOrErr, *Sym - &SymTab[0]);
   }
 
   // If this is a Sys-V hash table we verify its size and search the symbol
@@ -197,16 +214,22 @@ getHashTableSymbol(const ELFFile<ELFT> &Elf, const typename ELFT::Shdr &Sec,
                           sizeof(typename ELFT::Word) * HashTab->nchain)
       return createError("section has invalid sh_size: " + Twine(Sec.sh_size));
 
-    return getSymbolFromSysVHashTable<ELFT>(Name, *HashTab, SymTab, StrTab);
+    auto Sym = getSymbolFromSysVHashTable<ELFT>(Name, *HashTab, SymTab, StrTab);
+    if (!Sym)
+      return Sym.takeError();
+    if (!*Sym)
+      return std::nullopt;
+    return ELFObj.toSymbolRef(*SymTabOrErr, *Sym - &SymTab[0]);
   }
 
-  return nullptr;
+  return std::nullopt;
 }
 
 template <class ELFT>
-static Expected<const typename ELFT::Sym *>
-getSymTableSymbol(const ELFFile<ELFT> &Elf, const typename ELFT::Shdr &Sec,
-                  StringRef Name) {
+static Expected<std::optional<ELFSymbolRef>>
+getSymTableSymbol(const ELFObjectFile<ELFT> &ELFObj,
+                  const typename ELFT::Shdr &Sec, StringRef Name) {
+  const ELFFile<ELFT> &Elf = ELFObj.getELFFile();
   if (Sec.sh_type != ELF::SHT_SYMTAB && Sec.sh_type != ELF::SHT_DYNSYM)
     return createError(
         "invalid sh_type for hash table, expected SHT_SYMTAB or SHT_DYNSYM");
@@ -226,13 +249,14 @@ getSymTableSymbol(const ELFFile<ELFT> &Elf, const typename ELFT::Shdr &Sec,
 
   for (const typename ELFT::Sym &Sym : SymTab)
     if (StrTab.drop_front(Sym.st_name).data() == Name)
-      return &Sym;
+      return ELFObj.toSymbolRef(&Sec, &Sym - &SymTab[0]);
 
-  return nullptr;
+  return std::nullopt;
 }
 
-Expected<const typename ELF64LE::Sym *>
-utils::elf::getSymbol(const ELFObjectFile<ELF64LE> &ELFObj, StringRef Name) {
+template <class ELFT>
+static Expected<std::optional<ELFSymbolRef>>
+getSymbolImpl(const ELFObjectFile<ELFT> &ELFObj, StringRef Name) {
   // First try to look up the symbol via the hash table.
   for (ELFSectionRef Sec : ELFObj.sections()) {
     if (Sec.getType() != SHT_HASH && Sec.getType() != SHT_GNU_HASH)
@@ -241,8 +265,7 @@ utils::elf::getSymbol(const ELFObjectFile<ELF64LE> &ELFObj, StringRef Name) {
     auto HashTabOrErr = ELFObj.getELFFile().getSection(Sec.getIndex());
     if (!HashTabOrErr)
       return HashTabOrErr.takeError();
-    return getHashTableSymbol<ELF64LE>(ELFObj.getELFFile(), **HashTabOrErr,
-                                       Name);
+    return getHashTableSymbol<ELFT>(ELFObj, **HashTabOrErr, Name);
   }
 
   // If this is an executable file check the entire standard symbol table.
@@ -253,16 +276,31 @@ utils::elf::getSymbol(const ELFObjectFile<ELF64LE> &ELFObj, StringRef Name) {
     auto SymTabOrErr = ELFObj.getELFFile().getSection(Sec.getIndex());
     if (!SymTabOrErr)
       return SymTabOrErr.takeError();
-    return getSymTableSymbol<ELF64LE>(ELFObj.getELFFile(), **SymTabOrErr, Name);
+    return getSymTableSymbol<ELFT>(ELFObj, **SymTabOrErr, Name);
   }
 
-  return nullptr;
+  return std::nullopt;
 }
 
-Expected<const void *> utils::elf::getSymbolAddress(
-    const object::ELFObjectFile<object::ELF64LE> &ELFObj,
-    const object::ELF64LE::Sym &Symbol) {
-  const ELFFile<ELF64LE> &ELFFile = ELFObj.getELFFile();
+Expected<std::optional<ELFSymbolRef>>
+utils::elf::getSymbol(const ObjectFile &Obj, StringRef Name) {
+  if (const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(&Obj))
+    return getSymbolImpl(*ELFObj, Name);
+  if (const ELF64BEObjectFile *ELFObj = dyn_cast<ELF64BEObjectFile>(&Obj))
+    return getSymbolImpl(*ELFObj, Name);
+  return createError("Only 64-bit ELF files are supported");
+}
+
+template <class ELFT>
+static Expected<const void *>
+getSymbolAddressImpl(const ELFObjectFile<ELFT> &ELFObj,
+                     const ELFSymbolRef &SymRef) {
+  const ELFFile<ELFT> &ELFFile = ELFObj.getELFFile();
+
+  auto SymOrErr = ELFObj.getSymbol(SymRef.getRawDataRefImpl());
+  if (!SymOrErr)
+    return SymOrErr.takeError();
+  const auto &Symbol = **SymOrErr;
 
   auto SecOrErr = ELFFile.getSection(Symbol.st_shndx);
   if (!SecOrErr)
@@ -283,3 +321,13 @@ Expected<const void *> utils::elf::getSymbolAddress(
 
   return ELFFile.base() + Offset;
 }
+
+Expected<const void *>
+utils::elf::getSymbolAddress(const ELFSymbolRef &SymRef) {
+  const ObjectFile *Obj = SymRef.getObject();
+  if (const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
+    return getSymbolAddressImpl(*ELFObj, SymRef);
+  if (const ELF64BEObjectFile *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
+    return getSymbolAddressImpl(*ELFObj, SymRef);
+  return createError("Only 64-bit ELF files are supported");
+}
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index f85a00c..b862bc7 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -1166,7 +1166,7 @@ private:
 
     // Search for all symbols that contain a constructor or destructor.
     SmallVector<std::pair<StringRef, uint16_t>> Funcs;
-    for (ELFSymbolRef Sym : ELFObjOrErr->symbols()) {
+    for (ELFSymbolRef Sym : (*ELFObjOrErr)->symbols()) {
       auto NameOrErr = Sym.getName();
       if (!NameOrErr)
         return NameOrErr.takeError();
diff --git a/openmp/libomptarget/plugins-nextgen/ppc64/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/ppc64/CMakeLists.txt
index 0cccc9c..77466c1 100644
--- a/openmp/libomptarget/plugins-nextgen/ppc64/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/ppc64/CMakeLists.txt
@@ -11,7 +11,7 @@
 ##===----------------------------------------------------------------------===##
 
 if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-  build_generic_elf64("ppc64" "PPC64" "ppc64" "powerpc64-ibm-linux-gnu" "21")
+  build_generic_elf64("ppc64" "PPC64" "ppc64" "ppc64" "powerpc64-ibm-linux-gnu" "21")
 else()
  libomptarget_say("Not building ppc64 NextGen offloading plugin: machine not found in the system.")
 endif()
diff --git a/openmp/libomptarget/plugins-nextgen/ppc64le/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/ppc64le/CMakeLists.txt
index 9461d79..91d2162 100644
--- a/openmp/libomptarget/plugins-nextgen/ppc64le/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/ppc64le/CMakeLists.txt
@@ -11,7 +11,7 @@
 ##===----------------------------------------------------------------------===##
 
 if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-  build_generic_elf64("ppc64le" "PPC64le" "ppc64" "powerpc64le-ibm-linux-gnu" "21")
+  build_generic_elf64("ppc64le" "PPC64le" "ppc64" "ppc64le" "powerpc64le-ibm-linux-gnu" "21")
 else()
  libomptarget_say("Not building ppc64le NextGen offloading plugin: machine not found in the system.")
 endif()
diff --git a/openmp/libomptarget/plugins-nextgen/s390x/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/s390x/CMakeLists.txt
index 1b12a29..0388a23 100644
--- a/openmp/libomptarget/plugins-nextgen/s390x/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/s390x/CMakeLists.txt
@@ -11,7 +11,7 @@
 ##===----------------------------------------------------------------------===##
 
 if(CMAKE_SYSTEM_NAME MATCHES "Linux")
- build_generic_elf64("SystemZ" "S390X" "s390x" "s390x-ibm-linux-gnu" "22")
+ build_generic_elf64("s390x" "S390X" "s390x" "systemz" "s390x-ibm-linux-gnu" "22")
 else()
  libomptarget_say("Not building s390x NextGen offloading plugin: machine not found in the system.")
 endif()
diff --git a/openmp/libomptarget/plugins-nextgen/x86_64/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/x86_64/CMakeLists.txt
index 129d526..27cf3e0 100644
--- a/openmp/libomptarget/plugins-nextgen/x86_64/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/x86_64/CMakeLists.txt
@@ -11,7 +11,7 @@
 ##===----------------------------------------------------------------------===##
 
 if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-  build_generic_elf64("x86_64" "x86_64" "x86_64" "x86_64-pc-linux-gnu" "62")
+  build_generic_elf64("x86_64" "x86_64" "x86_64" "x86_64" "x86_64-pc-linux-gnu" "62")
 else()
  libomptarget_say("Not building x86_64 NextGen offloading plugin: machine not found in the system.")
 endif()
diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt
index 9bc3f333..d0971bd 100644
--- a/openmp/libomptarget/src/CMakeLists.txt
+++ b/openmp/libomptarget/src/CMakeLists.txt
@@ -73,6 +73,7 @@ if (NOT LIBOMPTARGET_PLUGINS_TO_LOAD)
 	check_plugin_target(cuda)
 	check_plugin_target(aarch64)
 	check_plugin_target(amdgpu)
+	check_plugin_target(s390x)
 endif()
 
 list(TRANSFORM LIBOMPTARGET_PLUGINS_TO_LOAD PREPEND "\"libomptarget.rtl.")
diff --git a/openmp/libomptarget/src/OpenMP/Mapping.cpp b/openmp/libomptarget/src/OpenMP/Mapping.cpp
index 9c0b219..6157626 100644
--- a/openmp/libomptarget/src/OpenMP/Mapping.cpp
+++ b/openmp/libomptarget/src/OpenMP/Mapping.cpp
@@ -511,7 +511,8 @@ static void printCopyInfoImpl(int DeviceId, bool H2D, void *SrcPtrBegin,
        "Copying data from %s to %s, %sPtr=" DPxMOD ", %sPtr=" DPxMOD
        ", Size=%" PRId64 ", Name=%s\n",
        H2D ? "host" : "device", H2D ? "device" : "host", H2D ? "Hst" : "Tgt",
-       DPxPTR(SrcPtrBegin), H2D ? "Tgt" : "Hst", DPxPTR(DstPtrBegin), Size,
+       DPxPTR(H2D ? SrcPtrBegin : DstPtrBegin), H2D ? "Tgt" : "Hst",
+       DPxPTR(H2D ? DstPtrBegin : SrcPtrBegin), Size,
        (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str()
                               : "unknown");
 }
diff --git a/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c b/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c
index 5095a69..3fe75f2 100644
--- a/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c
+++ b/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c
@@ -10,6 +10,8 @@
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
 // UNSUPPORTED: aarch64-unknown-linux-gnu
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 #include <omp.h>
 #include <stdio.h>
diff --git a/openmp/libomptarget/test/jit/empty_kernel_lvl1.c b/openmp/libomptarget/test/jit/empty_kernel_lvl1.c
index c908c8b..a0b8cd4 100644
--- a/openmp/libomptarget/test/jit/empty_kernel_lvl1.c
+++ b/openmp/libomptarget/test/jit/empty_kernel_lvl1.c
@@ -32,5 +32,7 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 #include "empty_kernel.inc"
diff --git a/openmp/libomptarget/test/jit/empty_kernel_lvl2.c b/openmp/libomptarget/test/jit/empty_kernel_lvl2.c
index 0b88d33..81a04f5 100644
--- a/openmp/libomptarget/test/jit/empty_kernel_lvl2.c
+++ b/openmp/libomptarget/test/jit/empty_kernel_lvl2.c
@@ -92,5 +92,7 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 #include "empty_kernel.inc"
diff --git a/openmp/libomptarget/test/jit/type_punning.c b/openmp/libomptarget/test/jit/type_punning.c
index 23aa69b..10e3d2c 100644
--- a/openmp/libomptarget/test/jit/type_punning.c
+++ b/openmp/libomptarget/test/jit/type_punning.c
@@ -12,6 +12,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 // Ensure that there is only the kernel function left, not any outlined
 // parallel regions.
diff --git a/openmp/libomptarget/test/mapping/auto_zero_copy.cpp b/openmp/libomptarget/test/mapping/auto_zero_copy.cpp
index 6f9d8c2..4664120 100644
--- a/openmp/libomptarget/test/mapping/auto_zero_copy.cpp
+++ b/openmp/libomptarget/test/mapping/auto_zero_copy.cpp
@@ -13,6 +13,8 @@
 // UNSUPPORTED: nvptx64-nvidia-cuda-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 // REQUIRES: unified_shared_memory
 
diff --git a/openmp/libomptarget/test/mapping/auto_zero_copy_globals.cpp b/openmp/libomptarget/test/mapping/auto_zero_copy_globals.cpp
index 4a13d27..55dfb28 100644
--- a/openmp/libomptarget/test/mapping/auto_zero_copy_globals.cpp
+++ b/openmp/libomptarget/test/mapping/auto_zero_copy_globals.cpp
@@ -9,6 +9,8 @@
 // UNSUPPORTED: nvptx64-nvidia-cuda-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 // REQUIRES: unified_shared_memory
 
diff --git a/openmp/libomptarget/test/offloading/barrier_fence.c b/openmp/libomptarget/test/offloading/barrier_fence.c
index 3eeaac5..9c3027f 100644
--- a/openmp/libomptarget/test/offloading/barrier_fence.c
+++ b/openmp/libomptarget/test/offloading/barrier_fence.c
@@ -11,6 +11,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 #include <omp.h>
 #include <stdio.h>
diff --git a/openmp/libomptarget/test/offloading/bug49334.cpp b/openmp/libomptarget/test/offloading/bug49334.cpp
index a22d3fe..1f19dab 100644
--- a/openmp/libomptarget/test/offloading/bug49334.cpp
+++ b/openmp/libomptarget/test/offloading/bug49334.cpp
@@ -9,6 +9,8 @@
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
 // UNSUPPORTED: aarch64-unknown-linux-gnu
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 // UNSUPPORTED: amdgcn-amd-amdhsa
 // UNSUPPORTED: nvptx64-nvidia-cuda
 // UNSUPPORTED: nvptx64-nvidia-cuda-LTO
diff --git a/openmp/libomptarget/test/offloading/default_thread_limit.c b/openmp/libomptarget/test/offloading/default_thread_limit.c
index d32e7df..4da02bb 100644
--- a/openmp/libomptarget/test/offloading/default_thread_limit.c
+++ b/openmp/libomptarget/test/offloading/default_thread_limit.c
@@ -9,6 +9,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 __attribute__((optnone)) int optnone() { return 1; }
 
diff --git a/openmp/libomptarget/test/offloading/ompx_bare.c b/openmp/libomptarget/test/offloading/ompx_bare.c
index fb3810b..3dabdcd 100644
--- a/openmp/libomptarget/test/offloading/ompx_bare.c
+++ b/openmp/libomptarget/test/offloading/ompx_bare.c
@@ -1,10 +1,13 @@
 // RUN: %libomptarget-compile-generic
-// RUN: env LIBOMPTARGET_INFO=63 %libomptarget-run-generic 2>&1 | %fcheck-generic
+// RUN: env LIBOMPTARGET_INFO=63 %libomptarget-run-generic 2>&1 | \
+// RUN:   %fcheck-generic
 //
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
 // UNSUPPORTED: aarch64-unknown-linux-gnu
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 #include <assert.h>
 #include <ompx.h>
diff --git a/openmp/libomptarget/test/offloading/ompx_coords.c b/openmp/libomptarget/test/offloading/ompx_coords.c
index 61dad61..5e4e14b 100644
--- a/openmp/libomptarget/test/offloading/ompx_coords.c
+++ b/openmp/libomptarget/test/offloading/ompx_coords.c
@@ -4,6 +4,8 @@
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
 // UNSUPPORTED: aarch64-unknown-linux-gnu
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 #include <omp.h>
 #include <ompx.h>
diff --git a/openmp/libomptarget/test/offloading/ompx_saxpy_mixed.c b/openmp/libomptarget/test/offloading/ompx_saxpy_mixed.c
index 440b694..f479be8 100644
--- a/openmp/libomptarget/test/offloading/ompx_saxpy_mixed.c
+++ b/openmp/libomptarget/test/offloading/ompx_saxpy_mixed.c
@@ -4,6 +4,8 @@
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
 // UNSUPPORTED: aarch64-unknown-linux-gnu
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 #include <math.h>
 #include <omp.h>
diff --git a/openmp/libomptarget/test/offloading/parallel_target_teams_reduction.cpp b/openmp/libomptarget/test/offloading/parallel_target_teams_reduction.cpp
index 5303a94..10e1b33 100644
--- a/openmp/libomptarget/test/offloading/parallel_target_teams_reduction.cpp
+++ b/openmp/libomptarget/test/offloading/parallel_target_teams_reduction.cpp
@@ -6,6 +6,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 #include <iostream>
 #include <vector>
diff --git a/openmp/libomptarget/test/offloading/small_trip_count.c b/openmp/libomptarget/test/offloading/small_trip_count.c
index d8bef66..65f094f 100644
--- a/openmp/libomptarget/test/offloading/small_trip_count.c
+++ b/openmp/libomptarget/test/offloading/small_trip_count.c
@@ -9,6 +9,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 #define N 128
 
diff --git a/openmp/libomptarget/test/offloading/small_trip_count_thread_limit.cpp b/openmp/libomptarget/test/offloading/small_trip_count_thread_limit.cpp
index 9796c2d..b7ae52a 100644
--- a/openmp/libomptarget/test/offloading/small_trip_count_thread_limit.cpp
+++ b/openmp/libomptarget/test/offloading/small_trip_count_thread_limit.cpp
@@ -7,6 +7,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 int main(int argc, char *argv[]) {
   constexpr const int block_size = 256;
diff --git a/openmp/libomptarget/test/offloading/spmdization.c b/openmp/libomptarget/test/offloading/spmdization.c
index 2cf30ff..77913be 100644
--- a/openmp/libomptarget/test/offloading/spmdization.c
+++ b/openmp/libomptarget/test/offloading/spmdization.c
@@ -11,6 +11,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 #include <omp.h>
 #include <stdio.h>
diff --git a/openmp/libomptarget/test/offloading/target_critical_region.cpp b/openmp/libomptarget/test/offloading/target_critical_region.cpp
index 9a741be..495632b 100644
--- a/openmp/libomptarget/test/offloading/target_critical_region.cpp
+++ b/openmp/libomptarget/test/offloading/target_critical_region.cpp
@@ -6,6 +6,8 @@
 // UNSUPPORTED: nvptx64-nvidia-cuda-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 // UNSUPPORTED: amdgcn-amd-amdhsa
 
 #include <omp.h>
diff --git a/openmp/libomptarget/test/offloading/thread_limit.c b/openmp/libomptarget/test/offloading/thread_limit.c
index 65275a8..a8cc51b 100644
--- a/openmp/libomptarget/test/offloading/thread_limit.c
+++ b/openmp/libomptarget/test/offloading/thread_limit.c
@@ -9,6 +9,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 int main() {
   int n = 1 << 20;
diff --git a/openmp/libomptarget/test/ompt/target_memcpy.c b/openmp/libomptarget/test/ompt/target_memcpy.c
index 80a8d6a..3224c5a 100644
--- a/openmp/libomptarget/test/ompt/target_memcpy.c
+++ b/openmp/libomptarget/test/ompt/target_memcpy.c
@@ -4,6 +4,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 /*
  * Verify that for the target OpenMP APIs, the return address is non-null and
diff --git a/openmp/libomptarget/test/ompt/target_memcpy_emi.c b/openmp/libomptarget/test/ompt/target_memcpy_emi.c
index 5347f38..fd7da13 100644
--- a/openmp/libomptarget/test/ompt/target_memcpy_emi.c
+++ b/openmp/libomptarget/test/ompt/target_memcpy_emi.c
@@ -4,6 +4,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 /*
  * Verify all three data transfer directions: H2D, D2D and D2H
diff --git a/openmp/libomptarget/test/ompt/veccopy.c b/openmp/libomptarget/test/ompt/veccopy.c
index 80e71fd..bc70f5a 100644
--- a/openmp/libomptarget/test/ompt/veccopy.c
+++ b/openmp/libomptarget/test/ompt/veccopy.c
@@ -4,6 +4,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 /*
  * Example OpenMP program that registers non-EMI callbacks
diff --git a/openmp/libomptarget/test/ompt/veccopy_data.c b/openmp/libomptarget/test/ompt/veccopy_data.c
index cef1de3..264b484 100644
--- a/openmp/libomptarget/test/ompt/veccopy_data.c
+++ b/openmp/libomptarget/test/ompt/veccopy_data.c
@@ -4,6 +4,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 /*
  * Example OpenMP program that registers EMI callbacks.
diff --git a/openmp/libomptarget/test/ompt/veccopy_disallow_both.c b/openmp/libomptarget/test/ompt/veccopy_disallow_both.c
index 06293e4..a011c21 100644
--- a/openmp/libomptarget/test/ompt/veccopy_disallow_both.c
+++ b/openmp/libomptarget/test/ompt/veccopy_disallow_both.c
@@ -4,6 +4,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 /*
  * Example OpenMP program that shows that both EMI and non-EMI
diff --git a/openmp/libomptarget/test/ompt/veccopy_emi.c b/openmp/libomptarget/test/ompt/veccopy_emi.c
index b597d7b..8718a39 100644
--- a/openmp/libomptarget/test/ompt/veccopy_emi.c
+++ b/openmp/libomptarget/test/ompt/veccopy_emi.c
@@ -4,6 +4,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 /*
  * Example OpenMP program that registers EMI callbacks
diff --git a/openmp/libomptarget/test/ompt/veccopy_emi_map.c b/openmp/libomptarget/test/ompt/veccopy_emi_map.c
index ce6f6e3..2accba3 100644
--- a/openmp/libomptarget/test/ompt/veccopy_emi_map.c
+++ b/openmp/libomptarget/test/ompt/veccopy_emi_map.c
@@ -4,6 +4,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 /*
  * Example OpenMP program that shows that map-EMI callbacks are not supported.
diff --git a/openmp/libomptarget/test/ompt/veccopy_map.c b/openmp/libomptarget/test/ompt/veccopy_map.c
index 83f63a6..56a0dd4 100644
--- a/openmp/libomptarget/test/ompt/veccopy_map.c
+++ b/openmp/libomptarget/test/ompt/veccopy_map.c
@@ -4,6 +4,8 @@
 // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
 // UNSUPPORTED: x86_64-pc-linux-gnu
 // UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
 /*
  * Example OpenMP program that shows that map callbacks are not supported.
diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index 3f831d6..d751a41 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -2610,6 +2610,43 @@ finish: // Clean up and exit.
       KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 ||            \
       KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC_XCOFF)
 
+// Because WebAssembly will use `call_indirect` to invoke the microtask and
+// WebAssembly indirect calls check that the called signature is a precise
+// match, we need to cast each microtask function pointer back from `void *` to
+// its original type.
+typedef void (*microtask_t0)(int *, int *);
+typedef void (*microtask_t1)(int *, int *, void *);
+typedef void (*microtask_t2)(int *, int *, void *, void *);
+typedef void (*microtask_t3)(int *, int *, void *, void *, void *);
+typedef void (*microtask_t4)(int *, int *, void *, void *, void *, void *);
+typedef void (*microtask_t5)(int *, int *, void *, void *, void *, void *,
+                             void *);
+typedef void (*microtask_t6)(int *, int *, void *, void *, void *, void *,
+                             void *, void *);
+typedef void (*microtask_t7)(int *, int *, void *, void *, void *, void *,
+                             void *, void *, void *);
+typedef void (*microtask_t8)(int *, int *, void *, void *, void *, void *,
+                             void *, void *, void *, void *);
+typedef void (*microtask_t9)(int *, int *, void *, void *, void *, void *,
+                             void *, void *, void *, void *, void *);
+typedef void (*microtask_t10)(int *, int *, void *, void *, void *, void *,
+                              void *, void *, void *, void *, void *, void *);
+typedef void (*microtask_t11)(int *, int *, void *, void *, void *, void *,
+                              void *, void *, void *, void *, void *, void *,
+                              void *);
+typedef void (*microtask_t12)(int *, int *, void *, void *, void *, void *,
+                              void *, void *, void *, void *, void *, void *,
+                              void *, void *);
+typedef void (*microtask_t13)(int *, int *, void *, void *, void *, void *,
+                              void *, void *, void *, void *, void *, void *,
+                              void *, void *, void *);
+typedef void (*microtask_t14)(int *, int *, void *, void *, void *, void *,
+                              void *, void *, void *, void *, void *, void *,
+                              void *, void *, void *, void *);
+typedef void (*microtask_t15)(int *, int *, void *, void *, void *, void *,
+                              void *, void *, void *, void *, void *, void *,
+                              void *, void *, void *, void *, void *);
+
 // we really only need the case with 1 argument, because CLANG always build
 // a struct of pointers to shared variables referenced in the outlined function
 int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
@@ -2629,66 +2666,76 @@ int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
     fflush(stderr);
     exit(-1);
   case 0:
-    (*pkfn)(&gtid, &tid);
+    (*(microtask_t0)pkfn)(&gtid, &tid);
     break;
   case 1:
-    (*pkfn)(&gtid, &tid, p_argv[0]);
+    (*(microtask_t1)pkfn)(&gtid, &tid, p_argv[0]);
     break;
   case 2:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);
+    (*(microtask_t2)pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);
     break;
   case 3:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);
+    (*(microtask_t3)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);
     break;
   case 4:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]);
+    (*(microtask_t4)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
+                          p_argv[3]);
     break;
   case 5:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]);
+    (*(microtask_t5)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
+                          p_argv[3], p_argv[4]);
     break;
   case 6:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
-            p_argv[5]);
+    (*(microtask_t6)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
+                          p_argv[3], p_argv[4], p_argv[5]);
     break;
   case 7:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
-            p_argv[5], p_argv[6]);
+    (*(microtask_t7)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
+                          p_argv[3], p_argv[4], p_argv[5], p_argv[6]);
     break;
   case 8:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
-            p_argv[5], p_argv[6], p_argv[7]);
+    (*(microtask_t8)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
+                          p_argv[3], p_argv[4], p_argv[5], p_argv[6],
+                          p_argv[7]);
     break;
   case 9:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8]);
+    (*(microtask_t9)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
+                          p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7],
+                          p_argv[8]);
     break;
   case 10:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]);
+    (*(microtask_t10)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
+                           p_argv[3], p_argv[4], p_argv[5], p_argv[6],
+                           p_argv[7], p_argv[8], p_argv[9]);
     break;
   case 11:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]);
+    (*(microtask_t11)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
+                           p_argv[3], p_argv[4], p_argv[5], p_argv[6],
+                           p_argv[7], p_argv[8], p_argv[9], p_argv[10]);
     break;
   case 12:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
-            p_argv[11]);
+    (*(microtask_t12)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
+                           p_argv[3], p_argv[4], p_argv[5], p_argv[6],
+                           p_argv[7], p_argv[8], p_argv[9], p_argv[10],
+                           p_argv[11]);
     break;
   case 13:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
-            p_argv[11], p_argv[12]);
+    (*(microtask_t13)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
+                           p_argv[3], p_argv[4], p_argv[5], p_argv[6],
+                           p_argv[7], p_argv[8], p_argv[9], p_argv[10],
+                           p_argv[11], p_argv[12]);
     break;
   case 14:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
-            p_argv[11], p_argv[12], p_argv[13]);
+    (*(microtask_t14)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
+                           p_argv[3], p_argv[4], p_argv[5], p_argv[6],
+                           p_argv[7], p_argv[8], p_argv[9], p_argv[10],
+                           p_argv[11], p_argv[12], p_argv[13]);
     break;
   case 15:
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
-            p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
+    (*(microtask_t15)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
+                           p_argv[3], p_argv[4], p_argv[5], p_argv[6],
+                           p_argv[7], p_argv[8], p_argv[9], p_argv[10],
+                           p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
     break;
   }
 
diff --git a/utils/bazel/llvm-project-overlay/clang-tools-extra/clangd/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang-tools-extra/clangd/BUILD.bazel
new file mode 100644
index 0000000..f2aa64b
--- /dev/null
+++ b/utils/bazel/llvm-project-overlay/clang-tools-extra/clangd/BUILD.bazel
@@ -0,0 +1,71 @@
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+)
+
+licenses(["notice"])
+
+# TODO: this is a shim to provide Features.inc as needed by Feature.h.
+# Replace this with something that parses Features.inc.in.
+genrule(
+    name = "gen_features_inc",
+    outs = ["Features.inc"],
+    cmd = "\n".join([
+        "echo '// IWYU pragma: private, include \"Feature.h\"' >> $@",
+        "echo '#define CLANGD_BUILD_XPC 0' >> $@",
+        "echo '#define CLANGD_ENABLE_REMOTE 1' >> $@",
+        "echo '#define ENABLE_GRPC_REFLECTION 0' >> $@",
+        "echo '#define CLANGD_MALLOC_TRIM 0' >> $@",
+        "echo '#define CLANGD_TIDY_CHECKS 1' >> $@",
+        "echo '#define CLANGD_DECISION_FOREST 1' >> $@",
+    ]),
+)
+
+# TODO: Pick up other files for more complete functionality, to match
+# clangd/CMakeLists.txt. This might look something like
+# glob(["*.cpp", "dir/**/*.cpp", ...]).
+cc_library(
+    name = "ClangDaemon",
+    srcs = [
+        "Feature.cpp",
+        "Features.inc",
+        "JSONTransport.cpp",
+        "Protocol.cpp",
+        "URI.cpp",
+        "index/SymbolID.cpp",
+        "support/Cancellation.cpp",
+        "support/Context.cpp",
+        "support/Logger.cpp",
+        "support/MemoryTree.cpp",
+        "support/Shutdown.cpp",
+        "support/ThreadCrashReporter.cpp",
+        "support/Trace.cpp",
+    ],
+    hdrs = [
+        "Feature.h",
+        "LSPBinder.h",
+        "Protocol.h",
+        "Transport.h",
+        "URI.h",
+        "index/SymbolID.h",
+        "support/Cancellation.h",
+        "support/Context.h",
+        "support/Function.h",
+        "support/Logger.h",
+        "support/MemoryTree.h",
+        "support/Shutdown.h",
+        "support/ThreadCrashReporter.h",
+        "support/Trace.h",
+    ],
+    includes = ["."],
+    deps = [
+        "//clang:basic",
+        "//clang:index",
+        "//llvm:Support",
+        "//llvm:TargetParser",
+    ],
+)
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel
index 5434761..7c8ad71 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel
@@ -64,8 +64,8 @@ libc_test(
     name = "integer_to_string_test",
     srcs = ["integer_to_string_test.cpp"],
     deps = [
-        "//libc:__support_cpp_span",
         "//libc:__support_cpp_limits",
+        "//libc:__support_cpp_span",
         "//libc:__support_cpp_string_view",
         "//libc:__support_integer_literals",
         "//libc:__support_integer_to_string",
diff --git a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel
index 99b9246..c8863af 100644
--- a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel
@@ -312,6 +312,7 @@ cc_test(
         "//llvm:AllTargetsAsmParsers",
         "//llvm:AllTargetsCodeGens",
         "//llvm:AsmParser",
+        "//llvm:BinaryFormat",
         "//llvm:Core",
         "//llvm:ExecutionEngine",
         "//llvm:IRReader",
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 28a69c7..c14869f 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -1773,8 +1773,6 @@ cc_library(
         ":EmitCDialect",
         ":FuncDialect",
         ":IR",
-        ":MathDialect",
-        ":SCFDialect",
         ":Support",
         ":TranslateLib",
         "//llvm:Support",
@@ -1938,15 +1936,15 @@ cc_library(
     includes = ["include"],
     deps = [
         ":ArithDialect",
-        ":ArmNeonIncGen",
         ":ArmNeonDialect",
+        ":ArmNeonIncGen",
         ":FuncDialect",
         ":IR",
         ":LLVMDialect",
         ":SideEffectInterfaces",
         ":Support",
-        ":VectorDialect",
         ":Transforms",
+        ":VectorDialect",
         "//llvm:Core",
         "//llvm:Support",
     ],
@@ -5290,6 +5288,14 @@ gentbl_cc_library(
             "include/mlir/Dialect/LLVMIR/LLVMInterfaces.cpp.inc",
         ),
         (
+            ["-gen-attr-interface-decls"],
+            "include/mlir/Dialect/LLVMIR/LLVMAttrInterfaces.h.inc",
+        ),
+        (
+            ["-gen-attr-interface-defs"],
+            "include/mlir/Dialect/LLVMIR/LLVMAttrInterfaces.cpp.inc",
+        ),
+        (
             ["-gen-type-interface-decls"],
             "include/mlir/Dialect/LLVMIR/LLVMTypeInterfaces.h.inc",
         ),
@@ -10964,9 +10970,9 @@ cc_library(
         ":LinalgStructuredOpsIncGen",
         ":MathDialect",
         ":MemRefDialect",
+        ":MeshShardingInterface",
         ":Parser",
         ":SCFDialect",
-        ":MeshShardingInterface",
         ":SideEffectInterfaces",
         ":SparseTensorDialect",
         ":Support",
@@ -11121,12 +11127,12 @@ cc_library(
         ":MemRefDialect",
         ":MemRefTransforms",
         ":MeshDialect",
+        ":MeshShardingInterface",
         ":MeshTransforms",
         ":Pass",
         ":SCFDialect",
         ":SCFTransforms",
         ":SCFUtils",
-        ":MeshShardingInterface",
         ":SparseTensorDialect",
         ":SubsetOpInterface",
         ":Support",
diff --git a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel
index a749a11..252b9ec 100644
--- a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel
@@ -61,8 +61,8 @@ cc_test(
     deps = [
         "//llvm:Support",
         "//llvm:TestingSupport",
-        "//mlir:BytecodeReader",
         "//mlir:ArithDialect",
+        "//mlir:BytecodeReader",
         "//mlir:ControlFlowInterfaces",
         "//mlir:DLTIDialect",
         "//mlir:DataLayoutInterfaces",
author	Slava Zakharin <szakharin@nvidia.com>	2024-03-15 14:27:05 -0700
committer	Slava Zakharin <szakharin@nvidia.com>	2024-03-15 14:27:05 -0700
commit	200c194c17b372e69b17bcec9a65c1bdea0965e1 (patch)
tree	f93a7811bb8abfa9be9d80fc89d6844bf6e6fed0
parent	d0a43d8ad55dcd075591bf9da3585a203acc4ad0 (diff)
parent	71e0261fb0c6c382f68eedddf6bbcf637e6709f2 (diff)
download	llvm-200c194c17b372e69b17bcec9a65c1bdea0965e1.zip llvm-200c194c17b372e69b17bcec9a65c1bdea0965e1.tar.gz llvm-200c194c17b372e69b17bcec9a65c1bdea0965e1.tar.bz2