121 files changed, 2790 insertions, 1108 deletions
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index dad6717..a0e7995 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1514,6 +1514,12 @@ void RewriteInstance::registerFragments() {
       }
       if (BD) {
         BinaryFunction *BF = BC->getFunctionForSymbol(BD->getSymbol());
+        if (BF == &Function) {
+          BC->errs()
+              << "BOLT-WARNING: fragment maps to the same function as parent: "
+              << Function << '\n';
+          continue;
+        }
         if (BF) {
           BC->registerFragment(Function, *BF);
           continue;
diff --git a/bolt/test/X86/fragment-alias.s b/bolt/test/X86/fragment-alias.s
new file mode 100644
index 0000000..3392dd5
--- /dev/null
+++ b/bolt/test/X86/fragment-alias.s
@@ -0,0 +1,13 @@
+## This test reproduces the issue where a fragment has the same address as
+## parent function.
+# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t
+# RUN: llvm-bolt %t -o %t.out 2>&1 | FileCheck %s
+# CHECK: BOLT-WARNING: fragment maps to the same function as parent: main/1(*2)
+.type main, @function
+.type main.cold, @function
+main.cold:
+main:
+  ret
+.size main, .-main
+.size main.cold, .-main.cold
diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp
index 138544d..acc8e87 100644
--- a/clang-tools-extra/clangd/Hover.cpp
+++ b/clang-tools-extra/clangd/Hover.cpp
@@ -1537,6 +1537,12 @@ markup::Document HoverInfo::presentDoxygen() const {
   SymbolDocCommentVisitor SymbolDoc(Documentation, CommentOpts);
 
   if (SymbolDoc.hasBriefCommand()) {
+    if (Kind != index::SymbolKind::Parameter &&
+        Kind != index::SymbolKind::TemplateTypeParm)
+      // Only add a "Brief" heading if we are not documenting a parameter.
+      // Parameters only have a brief section and adding the brief header would
+      // be redundant.
+      Output.addHeading(3).appendText("Brief");
     SymbolDoc.briefToMarkup(Output.addParagraph());
     Output.addRuler();
   }
@@ -1550,7 +1556,7 @@ markup::Document HoverInfo::presentDoxygen() const {
   // Returns
   // `type` - description
   if (TemplateParameters && !TemplateParameters->empty()) {
-    Output.addParagraph().appendBoldText("Template Parameters:");
+    Output.addHeading(3).appendText("Template Parameters");
     markup::BulletList &L = Output.addBulletList();
     for (const auto &Param : *TemplateParameters) {
       markup::Paragraph &P = L.addItem().addParagraph();
@@ -1564,7 +1570,7 @@ markup::Document HoverInfo::presentDoxygen() const {
   }
 
   if (Parameters && !Parameters->empty()) {
-    Output.addParagraph().appendBoldText("Parameters:");
+    Output.addHeading(3).appendText("Parameters");
     markup::BulletList &L = Output.addBulletList();
     for (const auto &Param : *Parameters) {
       markup::Paragraph &P = L.addItem().addParagraph();
@@ -1583,7 +1589,7 @@ markup::Document HoverInfo::presentDoxygen() const {
   if (ReturnType &&
       ((ReturnType->Type != "void" && !ReturnType->AKA.has_value()) ||
        (ReturnType->AKA.has_value() && ReturnType->AKA != "void"))) {
-    Output.addParagraph().appendBoldText("Returns:");
+    Output.addHeading(3).appendText("Returns");
     markup::Paragraph &P = Output.addParagraph();
     P.appendCode(llvm::to_string(*ReturnType));
 
@@ -1591,15 +1597,15 @@ markup::Document HoverInfo::presentDoxygen() const {
       P.appendText(" - ");
       SymbolDoc.returnToMarkup(P);
     }
+
+    SymbolDoc.retvalsToMarkup(Output);
     Output.addRuler();
   }
 
-  // add specially handled doxygen commands.
-  SymbolDoc.warningsToMarkup(Output);
-  SymbolDoc.notesToMarkup(Output);
-
-  // add any other documentation.
-  SymbolDoc.docToMarkup(Output);
+  if (SymbolDoc.hasDetailedDoc()) {
+    Output.addHeading(3).appendText("Details");
+    SymbolDoc.detailedDocToMarkup(Output);
+  }
 
   Output.addRuler();
 
diff --git a/clang-tools-extra/clangd/SymbolDocumentation.cpp b/clang-tools-extra/clangd/SymbolDocumentation.cpp
index 9ae1ef3..a50d7a5 100644
--- a/clang-tools-extra/clangd/SymbolDocumentation.cpp
+++ b/clang-tools-extra/clangd/SymbolDocumentation.cpp
@@ -13,6 +13,7 @@
 #include "clang/AST/CommentCommandTraits.h"
 #include "clang/AST/CommentVisitor.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace clang {
@@ -34,10 +35,20 @@ void commandToMarkup(markup::Paragraph &Out, StringRef Command,
                      StringRef Args) {
   Out.appendBoldText(commandMarkerAsString(CommandMarker) + Command.str());
   Out.appendSpace();
-  if (!Args.empty()) {
-    Out.appendEmphasizedText(Args.str());
+  if (!Args.empty())
+    Out.appendCode(Args.str());
+}
+
+template <typename T> std::string getArgText(const T *Command) {
+  std::string ArgText;
+  for (unsigned I = 0; I < Command->getNumArgs(); ++I) {
+    if (!ArgText.empty())
+      ArgText += " ";
+    ArgText += Command->getArgText(I);
   }
+  return ArgText;
 }
+
 } // namespace
 
 class ParagraphToMarkupDocument
@@ -70,12 +81,7 @@ public:
   void visitInlineCommandComment(const comments::InlineCommandComment *C) {
 
     if (C->getNumArgs() > 0) {
-      std::string ArgText;
-      for (unsigned I = 0; I < C->getNumArgs(); ++I) {
-        if (!ArgText.empty())
-          ArgText += " ";
-        ArgText += C->getArgText(I);
-      }
+      std::string ArgText = getArgText(C);
 
       switch (C->getRenderKind()) {
       case comments::InlineCommandRenderKind::Monospaced:
@@ -158,10 +164,9 @@ public:
   void visitInlineCommandComment(const comments::InlineCommandComment *C) {
     Out << commandMarkerAsString(C->getCommandMarker());
     Out << C->getCommandName(Traits);
-    if (C->getNumArgs() > 0) {
-      for (unsigned I = 0; I < C->getNumArgs(); ++I)
-        Out << " " << C->getArgText(I);
-    }
+    std::string ArgText = getArgText(C);
+    if (!ArgText.empty())
+      Out << " " << ArgText;
     Out << " ";
   }
 
@@ -210,16 +215,38 @@ public:
                                 Traits)
           .visit(B->getParagraph());
       break;
+    case comments::CommandTraits::KCI_note:
+    case comments::CommandTraits::KCI_warning:
+      commandToHeadedParagraph(B);
+      break;
+    case comments::CommandTraits::KCI_retval: {
+      // The \retval command describes the return value given as its single
+      // argument in the corresponding paragraph.
+      // Note: We know that we have exactly one argument but not if it has an
+      // associated paragraph.
+      auto &P = Out.addParagraph().appendCode(getArgText(B));
+      if (B->getParagraph() && !B->getParagraph()->isWhitespace()) {
+        P.appendText(" - ");
+        ParagraphToMarkupDocument(P, Traits).visit(B->getParagraph());
+      }
+      return;
+    }
+    case comments::CommandTraits::KCI_details: {
+      // The \details command is just used to separate the brief from the
+      // detailed description. This separation is already done in the
+      // SymbolDocCommentVisitor. Therefore we can omit the command itself
+      // here and just process the paragraph.
+      if (B->getParagraph() && !B->getParagraph()->isWhitespace()) {
+        ParagraphToMarkupDocument(Out.addParagraph(), Traits)
+            .visit(B->getParagraph());
+      }
+      return;
+    }
     default: {
       // Some commands have arguments, like \throws.
       // The arguments are not part of the paragraph.
       // We need reconstruct them here.
-      std::string ArgText;
-      for (unsigned I = 0; I < B->getNumArgs(); ++I) {
-        if (!ArgText.empty())
-          ArgText += " ";
-        ArgText += B->getArgText(I);
-      }
+      std::string ArgText = getArgText(B);
       auto &P = Out.addParagraph();
       commandToMarkup(P, B->getCommandName(Traits), B->getCommandMarker(),
                       ArgText);
@@ -234,7 +261,53 @@ public:
     }
   }
 
+  void visitCodeCommand(const comments::VerbatimBlockComment *VB) {
+    std::string CodeLang = "";
+    auto *FirstLine = VB->child_begin();
+    // The \\code command has an optional language argument.
+    // This argument is currently not parsed by the clang doxygen parser.
+    // Therefore we try to extract it from the first line of the verbatim
+    // block.
+    if (VB->getNumLines() > 0) {
+      if (const auto *Line =
+              cast<comments::VerbatimBlockLineComment>(*FirstLine)) {
+        llvm::StringRef Text = Line->getText();
+        // Language is a single word enclosed in {}.
+        if (llvm::none_of(Text, llvm::isSpace) && Text.consume_front("{") &&
+            Text.consume_back("}")) {
+          // drop a potential . since this is not supported in Markdown
+          // fenced code blocks.
+          Text.consume_front(".");
+          // Language is alphanumeric or '+'.
+          CodeLang = Text.take_while([](char C) {
+                           return llvm::isAlnum(C) || C == '+';
+                         })
+                         .str();
+          // Skip the first line for the verbatim text.
+          ++FirstLine;
+        }
+      }
+    }
+
+    std::string CodeBlockText;
+
+    for (const auto *LI = FirstLine; LI != VB->child_end(); ++LI) {
+      if (const auto *Line = cast<comments::VerbatimBlockLineComment>(*LI)) {
+        CodeBlockText += Line->getText().str() + "\n";
+      }
+    }
+
+    Out.addCodeBlock(CodeBlockText, CodeLang);
+  }
+
   void visitVerbatimBlockComment(const comments::VerbatimBlockComment *VB) {
+    // The \\code command is a special verbatim block command which we handle
+    // separately.
+    if (VB->getCommandID() == comments::CommandTraits::KCI_code) {
+      visitCodeCommand(VB);
+      return;
+    }
+
     commandToMarkup(Out.addParagraph(), VB->getCommandName(Traits),
                     VB->getCommandMarker(), "");
 
@@ -262,8 +335,123 @@ private:
   markup::Document &Out;
   const comments::CommandTraits &Traits;
   StringRef CommentEscapeMarker;
+
+  /// Emphasize the given command in a paragraph.
+  /// Uses the command name with the first letter capitalized as the heading.
+  void commandToHeadedParagraph(const comments::BlockCommandComment *B) {
+    auto &P = Out.addParagraph();
+    std::string Heading = B->getCommandName(Traits).slice(0, 1).upper() +
+                          B->getCommandName(Traits).drop_front().str();
+    P.appendBoldText(Heading + ":");
+    P.appendText("  \n");
+    ParagraphToMarkupDocument(P, Traits).visit(B->getParagraph());
+  }
 };
 
+void SymbolDocCommentVisitor::preprocessDocumentation(StringRef Doc) {
+  enum State {
+    Normal,
+    FencedCodeblock,
+  } State = Normal;
+  std::string CodeFence;
+
+  llvm::raw_string_ostream OS(CommentWithMarkers);
+
+  // The documentation string is processed line by line.
+  // The raw documentation string does not contain the comment markers
+  // (e.g. /// or /** */).
+  // But the comment lexer expects doxygen markers, so add them back.
+  // We need to use the /// style doxygen markers because the comment could
+  // contain the closing tag "*/" of a C Style "/** */" comment
+  // which would break the parsing if we would just enclose the comment text
+  // with "/** */".
+
+  // Escape doxygen commands inside markdown inline code spans.
+  // This is required to not let the doxygen parser interpret them as
+  // commands.
+  // Note: This is a heuristic which may fail in some cases.
+  bool InCodeSpan = false;
+
+  llvm::StringRef Line, Rest;
+  for (std::tie(Line, Rest) = Doc.split('\n'); !(Line.empty() && Rest.empty());
+       std::tie(Line, Rest) = Rest.split('\n')) {
+
+    // Detect code fence (``` or ~~~)
+    if (State == Normal) {
+      llvm::StringRef Trimmed = Line.ltrim();
+      if (Trimmed.starts_with("```") || Trimmed.starts_with("~~~")) {
+        // https://www.doxygen.nl/manual/markdown.html#md_fenced
+        CodeFence =
+            Trimmed.take_while([](char C) { return C == '`' || C == '~'; })
+                .str();
+        // Try to detect language: first word after fence. Could also be
+        // enclosed in {}
+        llvm::StringRef AfterFence =
+            Trimmed.drop_front(CodeFence.size()).ltrim();
+        // ignore '{' at the beginning of the language name to not duplicate it
+        // for the doxygen command
+        AfterFence.consume_front("{");
+        // The name is alphanumeric or '.' or '+'
+        StringRef CodeLang = AfterFence.take_while(
+            [](char C) { return llvm::isAlnum(C) || C == '.' || C == '+'; });
+
+        OS << "///@code";
+
+        if (!CodeLang.empty())
+          OS << "{" << CodeLang.str() << "}";
+
+        OS << "\n";
+
+        State = FencedCodeblock;
+        continue;
+      }
+
+      // FIXME: handle indented code blocks too?
+      // In doxygen, the indentation which triggers a code block depends on the
+      // indentation of the previous paragraph.
+      // https://www.doxygen.nl/manual/markdown.html#mddox_code_blocks
+    } else if (State == FencedCodeblock) {
+      // End of code fence
+      if (Line.ltrim().starts_with(CodeFence)) {
+        OS << "///@endcode\n";
+        State = Normal;
+        continue;
+      }
+      OS << "///" << Line << "\n";
+      continue;
+    }
+
+    // Normal line preprocessing (add doxygen markers, handle escaping)
+    OS << "///";
+
+    if (Line.empty() || Line.trim().empty()) {
+      OS << "\n";
+      // Empty lines reset the InCodeSpan state.
+      InCodeSpan = false;
+      continue;
+    }
+
+    if (Line.starts_with("<"))
+      // A comment line starting with '///<' is treated as a doxygen
+      // command. To avoid this, we add a space before the '<'.
+      OS << ' ';
+
+    for (char C : Line) {
+      if (C == '`')
+        InCodeSpan = !InCodeSpan;
+      else if (InCodeSpan && (C == '@' || C == '\\'))
+        OS << '\\';
+      OS << C;
+    }
+
+    OS << "\n";
+  }
+
+  // Close any unclosed code block
+  if (State == FencedCodeblock)
+    OS << "///@endcode\n";
+}
+
 void SymbolDocCommentVisitor::visitBlockCommandComment(
     const comments::BlockCommandComment *B) {
   switch (B->getCommandID()) {
@@ -282,36 +470,22 @@ void SymbolDocCommentVisitor::visitBlockCommandComment(
     }
     break;
   case comments::CommandTraits::KCI_retval:
-    RetvalParagraphs.push_back(B->getParagraph());
-    return;
-  case comments::CommandTraits::KCI_warning:
-    WarningParagraphs.push_back(B->getParagraph());
-    return;
-  case comments::CommandTraits::KCI_note:
-    NoteParagraphs.push_back(B->getParagraph());
+    // Only consider retval commands having an argument.
+    // The argument contains the described return value which is needed to
+    // convert it to markup.
+    if (B->getNumArgs() == 1)
+      RetvalCommands.push_back(B);
     return;
   default:
     break;
   }
 
-  // For all other commands, we store them in the UnhandledCommands map.
+  // For all other commands, we store them in the BlockCommands map.
   // This allows us to keep the order of the comments.
-  UnhandledCommands[CommentPartIndex] = B;
+  BlockCommands[CommentPartIndex] = B;
   CommentPartIndex++;
 }
 
-void SymbolDocCommentVisitor::paragraphsToMarkup(
-    markup::Document &Out,
-    const llvm::SmallVectorImpl<const comments::ParagraphComment *> &Paragraphs)
-    const {
-  if (Paragraphs.empty())
-    return;
-
-  for (const auto *P : Paragraphs) {
-    ParagraphToMarkupDocument(Out.addParagraph(), Traits).visit(P);
-  }
-}
-
 void SymbolDocCommentVisitor::briefToMarkup(markup::Paragraph &Out) const {
   if (!BriefParagraph)
     return;
@@ -324,14 +498,6 @@ void SymbolDocCommentVisitor::returnToMarkup(markup::Paragraph &Out) const {
   ParagraphToMarkupDocument(Out, Traits).visit(ReturnParagraph);
 }
 
-void SymbolDocCommentVisitor::notesToMarkup(markup::Document &Out) const {
-  paragraphsToMarkup(Out, NoteParagraphs);
-}
-
-void SymbolDocCommentVisitor::warningsToMarkup(markup::Document &Out) const {
-  paragraphsToMarkup(Out, WarningParagraphs);
-}
-
 void SymbolDocCommentVisitor::parameterDocToMarkup(
     StringRef ParamName, markup::Paragraph &Out) const {
   if (ParamName.empty())
@@ -352,9 +518,9 @@ void SymbolDocCommentVisitor::parameterDocToString(
   }
 }
 
-void SymbolDocCommentVisitor::docToMarkup(markup::Document &Out) const {
+void SymbolDocCommentVisitor::detailedDocToMarkup(markup::Document &Out) const {
   for (unsigned I = 0; I < CommentPartIndex; ++I) {
-    if (const auto *BC = UnhandledCommands.lookup(I)) {
+    if (const auto *BC = BlockCommands.lookup(I)) {
       BlockCommentToMarkupDocument(Out, Traits).visit(BC);
     } else if (const auto *P = FreeParagraphs.lookup(I)) {
       ParagraphToMarkupDocument(Out.addParagraph(), Traits).visit(P);
@@ -382,5 +548,14 @@ void SymbolDocCommentVisitor::templateTypeParmDocToString(
   }
 }
 
+void SymbolDocCommentVisitor::retvalsToMarkup(markup::Document &Out) const {
+  if (RetvalCommands.empty())
+    return;
+  markup::BulletList &BL = Out.addBulletList();
+  for (const auto *P : RetvalCommands) {
+    BlockCommentToMarkupDocument(BL.addItem(), Traits).visit(P);
+  }
+}
+
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/clangd/SymbolDocumentation.h b/clang-tools-extra/clangd/SymbolDocumentation.h
index b0d3428..88c7ade 100644
--- a/clang-tools-extra/clangd/SymbolDocumentation.h
+++ b/clang-tools-extra/clangd/SymbolDocumentation.h
@@ -21,6 +21,7 @@
 #include "clang/AST/CommentSema.h"
 #include "clang/AST/CommentVisitor.h"
 #include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
 
@@ -51,31 +52,8 @@ public:
     CommentWithMarkers.reserve(Documentation.size() +
                                Documentation.count('\n') * 3);
 
-    // The comment lexer expects doxygen markers, so add them back.
-    // We need to use the /// style doxygen markers because the comment could
-    // contain the closing the closing tag "*/" of a C Style "/** */" comment
-    // which would break the parsing if we would just enclose the comment text
-    // with "/** */".
-    CommentWithMarkers = "///";
-    bool NewLine = true;
-    for (char C : Documentation) {
-      if (C == '\n') {
-        CommentWithMarkers += "\n///";
-        NewLine = true;
-      } else {
-        if (NewLine && (C == '<')) {
-          // A comment line starting with '///<' is treated as a doxygen
-          // comment. Therefore add a space to separate the '<' from the comment
-          // marker. This allows to parse html tags at the beginning of a line
-          // and the escape marker prevents adding the artificial space in the
-          // markup documentation. The extra space will not be rendered, since
-          // we render it as markdown.
-          CommentWithMarkers += ' ';
-        }
-        CommentWithMarkers += C;
-        NewLine = false;
-      }
-    }
+    preprocessDocumentation(Documentation);
+
     SourceManagerForFile SourceMgrForFile("mock_file.cpp", CommentWithMarkers);
 
     SourceManager &SourceMgr = SourceMgrForFile.get();
@@ -100,6 +78,14 @@ public:
     for (auto *Block : FC->getBlocks()) {
       visit(Block);
     }
+
+    // If we have not seen a brief command, use the very first free paragraph as
+    // the brief.
+    if (!BriefParagraph && !FreeParagraphs.empty() &&
+        FreeParagraphs.contains(0)) {
+      BriefParagraph = FreeParagraphs.lookup(0);
+      FreeParagraphs.erase(0);
+    }
   }
 
   bool isParameterDocumented(StringRef ParamName) const {
@@ -114,22 +100,18 @@ public:
 
   bool hasReturnCommand() const { return ReturnParagraph; }
 
-  bool hasRetvalCommands() const { return !RetvalParagraphs.empty(); }
-
-  bool hasNoteCommands() const { return !NoteParagraphs.empty(); }
-
-  bool hasWarningCommands() const { return !WarningParagraphs.empty(); }
+  bool hasDetailedDoc() const {
+    return !FreeParagraphs.empty() || !BlockCommands.empty();
+  }
 
   /// Converts all unhandled comment commands to a markup document.
-  void docToMarkup(markup::Document &Out) const;
+  void detailedDocToMarkup(markup::Document &Out) const;
   /// Converts the "brief" command(s) to a markup document.
   void briefToMarkup(markup::Paragraph &Out) const;
   /// Converts the "return" command(s) to a markup document.
   void returnToMarkup(markup::Paragraph &Out) const;
-  /// Converts the "note" command(s) to a markup document.
-  void notesToMarkup(markup::Document &Out) const;
-  /// Converts the "warning" command(s) to a markup document.
-  void warningsToMarkup(markup::Document &Out) const;
+  /// Converts the "retval" command(s) to a markup document.
+  void retvalsToMarkup(markup::Document &Out) const;
 
   void visitBlockCommandComment(const comments::BlockCommandComment *B);
 
@@ -145,8 +127,10 @@ public:
                             llvm::raw_string_ostream &Out) const;
 
   void visitParagraphComment(const comments::ParagraphComment *P) {
-    FreeParagraphs[CommentPartIndex] = P;
-    CommentPartIndex++;
+    if (!P->isWhitespace()) {
+      FreeParagraphs[CommentPartIndex] = P;
+      CommentPartIndex++;
+    }
   }
 
   void visitParamCommandComment(const comments::ParamCommandComment *P) {
@@ -157,6 +141,27 @@ public:
     TemplateParameters[TP->getParamNameAsWritten()] = std::move(TP);
   }
 
+  /// \brief Preprocesses the raw documentation string to prepare it for doxygen
+  /// parsing.
+  ///
+  /// This is a workaround to provide better support for markdown in
+  /// doxygen. Clang's doxygen parser e.g. does not handle markdown code blocks.
+  ///
+  /// The documentation string is preprocessed to replace some markdown
+  /// constructs with parsable doxygen commands. E.g. markdown code blocks are
+  /// replaced with doxygen \\code{.lang} ...
+  /// \\endcode blocks.
+  ///
+  /// Additionally, potential doxygen commands inside markdown
+  /// inline code spans are escaped to avoid that doxygen tries to interpret
+  /// them as commands.
+  ///
+  /// \note Although this is a workaround, it is very similar to what
+  /// doxygen itself does for markdown. In doxygen, the first parsing step is
+  /// also a markdown preprocessing step.
+  /// See https://www.doxygen.nl/manual/markdown.html
+  void preprocessDocumentation(StringRef Doc);
+
 private:
   comments::CommandTraits Traits;
   llvm::BumpPtrAllocator Allocator;
@@ -173,19 +178,13 @@ private:
   /// Paragraph of the "return" command.
   const comments::ParagraphComment *ReturnParagraph = nullptr;
 
-  /// Paragraph(s) of the "note" command(s)
-  llvm::SmallVector<const comments::ParagraphComment *> RetvalParagraphs;
+  /// All the "retval" command(s)
+  llvm::SmallVector<const comments::BlockCommandComment *> RetvalCommands;
 
-  /// Paragraph(s) of the "note" command(s)
-  llvm::SmallVector<const comments::ParagraphComment *> NoteParagraphs;
-
-  /// Paragraph(s) of the "warning" command(s)
-  llvm::SmallVector<const comments::ParagraphComment *> WarningParagraphs;
-
-  /// All the paragraphs we don't have any special handling for,
-  /// e.g. "details".
+  /// All the parsed doxygen block commands.
+  /// They might have special handling internally like \\note or \\warning
   llvm::SmallDenseMap<unsigned, const comments::BlockCommandComment *>
-      UnhandledCommands;
+      BlockCommands;
 
   /// Parsed paragaph(s) of the "param" comamnd(s)
   llvm::SmallDenseMap<StringRef, const comments::ParamCommandComment *>
@@ -198,11 +197,6 @@ private:
   /// All "free" text paragraphs.
   llvm::SmallDenseMap<unsigned, const comments::ParagraphComment *>
       FreeParagraphs;
-
-  void paragraphsToMarkup(
-      markup::Document &Out,
-      const llvm::SmallVectorImpl<const comments::ParagraphComment *>
-          &Paragraphs) const;
 };
 
 } // namespace clangd
diff --git a/clang-tools-extra/clangd/support/Markup.cpp b/clang-tools-extra/clangd/support/Markup.cpp
index 89bdc65..304917d 100644
--- a/clang-tools-extra/clangd/support/Markup.cpp
+++ b/clang-tools-extra/clangd/support/Markup.cpp
@@ -199,10 +199,16 @@ bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After,
   return needsLeadingEscapeMarkdown(C, After);
 }
 
-/// Escape a markdown text block.
+/// \brief Render text for markdown output.
+///
 /// If \p EscapeMarkdown is true it ensures the punctuation will not introduce
 /// any of the markdown constructs.
+///
 /// Else, markdown syntax is not escaped, only HTML tags and entities.
+/// HTML is escaped because usually clients do not support HTML rendering by
+/// default. Passing unescaped HTML will therefore often result in not showing
+/// the HTML at all.
+/// \note In markdown code spans, we do not escape anything.
 std::string renderText(llvm::StringRef Input, bool StartsLine,
                        bool EscapeMarkdown) {
   std::string R;
@@ -213,6 +219,10 @@ std::string renderText(llvm::StringRef Input, bool StartsLine,
 
   bool IsFirstLine = true;
 
+  // Inside markdown code spans, we do not escape anything when EscapeMarkdown
+  // is false.
+  bool InCodeSpan = false;
+
   for (std::tie(Line, Rest) = Input.split('\n');
        !(Line.empty() && Rest.empty());
        std::tie(Line, Rest) = Rest.split('\n')) {
@@ -226,12 +236,27 @@ std::string renderText(llvm::StringRef Input, bool StartsLine,
       R.append(LeadingSpaces);
     }
 
+    // Handle the case where the user escaped a character themselves.
+    // This is relevant for markdown code spans if EscapeMarkdown is false,
+    // because if the user escaped a backtick, we must treat the enclosed text
+    // as normal markdown text.
+    bool UserEscape = false;
     for (unsigned I = LeadingSpaces.size(); I < Line.size(); ++I) {
-      if (needsLeadingEscape(Line[I], Line.substr(LeadingSpaces.size(), I),
+
+      if (!EscapeMarkdown && !UserEscape && Line[I] == '`')
+        InCodeSpan = !InCodeSpan;
+
+      if (!InCodeSpan &&
+          needsLeadingEscape(Line[I], Line.substr(LeadingSpaces.size(), I),
                              Line.substr(I + 1), StartsLineIntern,
                              EscapeMarkdown))
         R.push_back('\\');
       R.push_back(Line[I]);
+
+      if (Line[I] == '\\')
+        UserEscape = !UserEscape;
+      else
+        UserEscape = false;
     }
 
     IsFirstLine = false;
diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp
index e9abf71..718c1bc 100644
--- a/clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -3457,13 +3457,15 @@ template <typename T, typename C = bool> class Foo {}
 ```
 
 ---
-**Template Parameters:**
+### Brief
 
-- `typename T`
-- `typename C = bool`
+documentation
 
 ---
-documentation
+### Template Parameters
+
+- `typename T`
+- `typename C = bool`
 
 ---
 Size: 10 bytes)",
@@ -3506,7 +3508,7 @@ ret_type foo(params) {}
 ```
 
 ---
-**Parameters:**
+### Parameters
 
 - 
 - `type (aka can_type)`
@@ -3514,7 +3516,7 @@ ret_type foo(params) {}
 - `type foo = default (aka can_type)`
 
 ---
-**Returns:**
+### Returns
 
 `ret_type (aka can_ret_type)`)",
       },
@@ -3649,7 +3651,7 @@ protected: size_t method()
 ```
 
 ---
-**Returns:**
+### Returns
 
 `size_t (aka unsigned long)`)",
       },
@@ -3688,7 +3690,7 @@ public: cls(int a, int b = 5)
 ```
 
 ---
-**Parameters:**
+### Parameters
 
 - `int a`
 - `int b = 5`)",
@@ -4001,9 +4003,13 @@ void foo()
 ```
 
 ---
+### Brief
+
 brief doc
 
 ---
+### Details
+
 longer doc)"},
       {[](HoverInfo &HI) {
          HI.Kind = index::SymbolKind::Function;
@@ -4034,20 +4040,34 @@ int foo()
 ```
 
 ---
+### Brief
+
 brief doc
 
 ---
-**Returns:**
+### Returns
 
 `int`
 
 ---
+### Details
+
 longer doc)"},
       {[](HoverInfo &HI) {
          HI.Kind = index::SymbolKind::Function;
-         HI.Documentation = "@brief brief doc\n\n"
-                            "longer doc\n@param a this is a param\n@return it "
-                            "returns something";
+         HI.Documentation = R"(@brief brief doc
+
+longer doc
+@note this is a note
+
+As you see, notes are "inlined".
+@warning this is a warning
+
+As well as warnings
+@param a this is a param
+@return it returns something
+@retval 0 if successful
+@retval 1 if failed)";
          HI.Definition = "int foo(int a)";
          HI.ReturnType = "int";
          HI.Name = "foo";
@@ -4068,8 +4088,16 @@ Parameters:
 @brief brief doc
 
 longer doc
+@note this is a note
+
+As you see, notes are "inlined".
+@warning this is a warning
+
+As well as warnings
 @param a this is a param
 @return it returns something
+@retval 0 if successful
+@retval 1 if failed
 
 ---
 ```cpp
@@ -4083,20 +4111,37 @@ int foo(int a)
 ```
 
 ---
+### Brief
+
 brief doc
 
 ---
-**Parameters:**
+### Parameters
 
 - `int a` - this is a param
 
 ---
-**Returns:**
+### Returns
 
 `int` - it returns something
 
+- `0` - if successful
+- `1` - if failed
+
 ---
-longer doc)"},
+### Details
+
+longer doc
+
+**Note:**  
+this is a note
+
+As you see, notes are "inlined".
+
+**Warning:**  
+this is a warning
+
+As well as warnings)"},
       {[](HoverInfo &HI) {
          HI.Kind = index::SymbolKind::Function;
          HI.Documentation = "@brief brief doc\n\n"
@@ -4138,19 +4183,23 @@ int foo(int a)
 ```
 
 ---
+### Brief
+
 brief doc
 
 ---
-**Parameters:**
+### Parameters
 
 - `int a` - this is a param
 
 ---
-**Returns:**
+### Returns
 
 `int` - it returns something
 
 ---
+### Details
+
 longer doc)"},
   };
 
diff --git a/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp b/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
index 31a6a14..b3185cc 100644
--- a/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
@@ -15,7 +15,7 @@
 namespace clang {
 namespace clangd {
 
-TEST(SymbolDocumentation, UnhandledDocs) {
+TEST(SymbolDocumentation, DetailedDocToMarkup) {
 
   CommentOptions CommentOpts;
 
@@ -26,52 +26,52 @@ TEST(SymbolDocumentation, UnhandledDocs) {
     llvm::StringRef ExpectedRenderPlainText;
   } Cases[] = {
       {
-          "foo bar",
+          "brief\n\nfoo bar",
           "foo bar",
           "foo bar",
           "foo bar",
       },
       {
-          "foo\nbar\n",
+          "brief\n\nfoo\nbar\n",
           "foo\nbar",
           "foo\nbar",
           "foo bar",
       },
       {
-          "foo\n\nbar\n",
+          "brief\n\nfoo\n\nbar\n",
           "foo\n\nbar",
           "foo\n\nbar",
           "foo\n\nbar",
       },
       {
-          "foo \\p bar baz",
+          "brief\n\nfoo \\p bar baz",
           "foo `bar` baz",
           "foo `bar` baz",
           "foo bar baz",
       },
       {
-          "foo \\e bar baz",
+          "brief\n\nfoo \\e bar baz",
           "foo \\*bar\\* baz",
           "foo *bar* baz",
           "foo *bar* baz",
       },
       {
-          "foo \\b bar baz",
+          "brief\n\nfoo \\b bar baz",
           "foo \\*\\*bar\\*\\* baz",
           "foo **bar** baz",
           "foo **bar** baz",
       },
       {
-          "foo \\ref bar baz",
-          "foo \\*\\*\\\\ref\\*\\* \\*bar\\* baz",
-          "foo **\\ref** *bar* baz",
-          "foo **\\ref** *bar* baz",
+          "brief\n\nfoo \\ref bar baz",
+          "foo \\*\\*\\\\ref\\*\\* `bar` baz",
+          "foo **\\ref** `bar` baz",
+          "foo **\\ref** bar baz",
       },
       {
-          "foo @ref bar baz",
-          "foo \\*\\*@ref\\*\\* \\*bar\\* baz",
-          "foo **@ref** *bar* baz",
-          "foo **@ref** *bar* baz",
+          "brief\n\nfoo @ref bar baz",
+          "foo \\*\\*@ref\\*\\* `bar` baz",
+          "foo **@ref** `bar` baz",
+          "foo **@ref** bar baz",
       },
       {
           "\\brief this is a \\n\nbrief description",
@@ -80,10 +80,10 @@ TEST(SymbolDocumentation, UnhandledDocs) {
           "",
       },
       {
-          "\\throw exception foo",
-          "\\*\\*\\\\throw\\*\\* \\*exception\\* foo",
-          "**\\throw** *exception* foo",
-          "**\\throw** *exception* foo",
+          "brief\n\n\\throw exception foo",
+          "\\*\\*\\\\throw\\*\\* `exception` foo",
+          "**\\throw** `exception` foo",
+          "**\\throw** exception foo",
       },
       {
           R"(\brief this is a brief description
@@ -108,7 +108,8 @@ TEST(SymbolDocumentation, UnhandledDocs) {
 - item 3)",
       },
       {
-          "\\defgroup mygroup this is a group\nthis is not a group description",
+          "brief\n\n\\defgroup mygroup this is a group\nthis is not a group "
+          "description",
           "\\*\\*@defgroup\\*\\* `mygroup this is a group`\n\nthis is not a "
           "group "
           "description",
@@ -118,7 +119,9 @@ TEST(SymbolDocumentation, UnhandledDocs) {
           "description",
       },
       {
-          R"(\verbatim
+          R"(brief
+
+\verbatim
 this is a
 verbatim block containing
 some verbatim text
@@ -150,7 +153,7 @@ some verbatim text
 **@endverbatim**)",
       },
       {
-          "@param foo this is a parameter\n@param bar this is another "
+          "brief\n\n@param foo this is a parameter\n@param bar this is another "
           "parameter",
           "",
           "",
@@ -169,24 +172,26 @@ More description
 documentation)",
           R"(\*\*\\brief\*\* another brief?
 
-\*\*\\details\*\* these are details
+these are details
 
 More description
 documentation)",
           R"(**\brief** another brief?
 
-**\details** these are details
+these are details
 
 More description
 documentation)",
           R"(**\brief** another brief?
 
-**\details** these are details
+these are details
 
 More description documentation)",
       },
       {
-          R"(<b>this is a bold text</b>
+          R"(brief
+
+<b>this is a bold text</b>
 normal text<i>this is an italic text</i>
 <code>this is a code block</code>)",
           R"(\<b>this is a bold text\</b>
@@ -198,12 +203,528 @@ normal text\<i>this is an italic text\</i>
           "<b>this is a bold text</b> normal text<i>this is an italic text</i> "
           "<code>this is a code block</code>",
       },
+      {"brief\n\n@note This is a note",
+       R"(\*\*Note:\*\*  
+This is a note)",
+       R"(**Note:**  
+This is a note)",
+       R"(**Note:**
+This is a note)"},
+      {R"(brief
+
+Paragraph 1
+@note This is a note
+
+Paragraph 2)",
+       R"(Paragraph 1
+
+\*\*Note:\*\*  
+This is a note
+
+Paragraph 2)",
+       R"(Paragraph 1
+
+**Note:**  
+This is a note
+
+Paragraph 2)",
+       R"(Paragraph 1
+
+**Note:**
+This is a note
+
+Paragraph 2)"},
+      {"brief\n\n@warning This is a warning",
+       R"(\*\*Warning:\*\*  
+This is a warning)",
+       R"(**Warning:**  
+This is a warning)",
+       R"(**Warning:**
+This is a warning)"},
+      {R"(brief
+
+Paragraph 1
+@warning This is a warning
+
+Paragraph 2)",
+       R"(Paragraph 1
+
+\*\*Warning:\*\*  
+This is a warning
+
+Paragraph 2)",
+       R"(Paragraph 1
+
+**Warning:**  
+This is a warning
+
+Paragraph 2)",
+       R"(Paragraph 1
+
+**Warning:**
+This is a warning
+
+Paragraph 2)"},
+      {R"(@note this is not treated as brief
+
+@brief this is the brief
+
+Another paragraph)",
+       R"(\*\*Note:\*\*  
+this is not treated as brief
+
+Another paragraph)",
+       R"(**Note:**  
+this is not treated as brief
+
+Another paragraph)",
+       R"(**Note:**
+this is not treated as brief
+
+Another paragraph)"},
+      {R"(
+@brief Some brief
+)",
+       "", "", ""},
+      {R"(
+Some brief
+)",
+       "", "", ""},
+  };
+  for (const auto &C : Cases) {
+    markup::Document Doc;
+    SymbolDocCommentVisitor SymbolDoc(C.Documentation, CommentOpts);
+
+    SymbolDoc.detailedDocToMarkup(Doc);
+
+    EXPECT_EQ(Doc.asPlainText(), C.ExpectedRenderPlainText);
+    EXPECT_EQ(Doc.asMarkdown(), C.ExpectedRenderMarkdown);
+    EXPECT_EQ(Doc.asEscapedMarkdown(), C.ExpectedRenderEscapedMarkdown);
+  }
+}
+
+TEST(SymbolDocumentation, RetvalCommand) {
+
+  CommentOptions CommentOpts;
+
+  struct Case {
+    llvm::StringRef Documentation;
+    llvm::StringRef ExpectedRenderEscapedMarkdown;
+    llvm::StringRef ExpectedRenderMarkdown;
+    llvm::StringRef ExpectedRenderPlainText;
+  } Cases[] = {
+      {"@retval", "", "", ""},
+      {R"(@retval MyReturnVal
+@retval MyOtherReturnVal)",
+       R"(- `MyReturnVal`
+- `MyOtherReturnVal`)",
+       R"(- `MyReturnVal`
+- `MyOtherReturnVal`)",
+       R"(- MyReturnVal
+- MyOtherReturnVal)"},
+      {R"(@retval MyReturnVal if foo
+@retval MyOtherReturnVal if bar)",
+       R"(- `MyReturnVal` - if foo
+- `MyOtherReturnVal` - if bar)",
+       R"(- `MyReturnVal` - if foo
+- `MyOtherReturnVal` - if bar)",
+       R"(- MyReturnVal - if foo
+- MyOtherReturnVal - if bar)"},
+  };
+  for (const auto &C : Cases) {
+    markup::Document Doc;
+    SymbolDocCommentVisitor SymbolDoc(C.Documentation, CommentOpts);
+
+    SymbolDoc.retvalsToMarkup(Doc);
+
+    EXPECT_EQ(Doc.asPlainText(), C.ExpectedRenderPlainText);
+    EXPECT_EQ(Doc.asMarkdown(), C.ExpectedRenderMarkdown);
+    EXPECT_EQ(Doc.asEscapedMarkdown(), C.ExpectedRenderEscapedMarkdown);
+  }
+}
+
+TEST(SymbolDocumentation, DoxygenCodeBlocks) {
+  CommentOptions CommentOpts;
+
+  struct Case {
+    llvm::StringRef Documentation;
+    llvm::StringRef ExpectedRenderEscapedMarkdown;
+    llvm::StringRef ExpectedRenderMarkdown;
+    llvm::StringRef ExpectedRenderPlainText;
+  } Cases[] = {
+      {R"(@code
+int code() { return 0; }
+@endcode
+@code{.cpp}
+int code_lang() { return 0; }
+@endcode
+@code{.c++}
+int code_lang_plus() { return 0; }
+@endcode
+@code{.py}
+class A:
+    pass
+@endcode
+@code{nolang}
+class B:
+    pass
+@endcode)",
+       R"(```
+int code() { return 0; }
+```
+
+```cpp
+int code_lang() { return 0; }
+```
+
+```c++
+int code_lang_plus() { return 0; }
+```
+
+```py
+class A:
+    pass
+```
+
+```nolang
+class B:
+    pass
+```)",
+       R"(```
+int code() { return 0; }
+```
+
+```cpp
+int code_lang() { return 0; }
+```
+
+```c++
+int code_lang_plus() { return 0; }
+```
+
+```py
+class A:
+    pass
+```
+
+```nolang
+class B:
+    pass
+```)",
+       R"(int code() { return 0; }
+
+int code_lang() { return 0; }
+
+int code_lang_plus() { return 0; }
+
+class A:
+    pass
+
+class B:
+    pass)"},
+  };
+  for (const auto &C : Cases) {
+    markup::Document Doc;
+    SymbolDocCommentVisitor SymbolDoc(C.Documentation, CommentOpts);
+
+    SymbolDoc.detailedDocToMarkup(Doc);
+
+    EXPECT_EQ(Doc.asPlainText(), C.ExpectedRenderPlainText);
+    EXPECT_EQ(Doc.asMarkdown(), C.ExpectedRenderMarkdown);
+    EXPECT_EQ(Doc.asEscapedMarkdown(), C.ExpectedRenderEscapedMarkdown);
+  }
+}
+
+TEST(SymbolDocumentation, MarkdownCodeBlocks) {
+  CommentOptions CommentOpts;
+
+  struct Case {
+    llvm::StringRef Documentation;
+    llvm::StringRef ExpectedRenderEscapedMarkdown;
+    llvm::StringRef ExpectedRenderMarkdown;
+    llvm::StringRef ExpectedRenderPlainText;
+  } Cases[] = {
+      {R"(```
+int backticks() { return 0; }
+```
+```cpp
+int backticks_lang() { return 0; }
+```
+```c++
+int backticks_lang_plus() { return 0; }
+```
+~~~
+int tilde() { return 0; }
+~~~
+~~~~~~~~~~~~~~~~~~~~~~~~
+int tilde_many() { return 0; }
+~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~{.c++}
+int tilde_many_lang() { return 0; }
+~~~~~~~~~~~~~~~~~~~~~~~~
+```py
+class A:
+    pass
+```
+```python
+class B:
+    pass
+```
+~~~{.python}
+class C:
+    pass
+~~~
+)",
+       R"(```
+int backticks() { return 0; }
+```
+
+```cpp
+int backticks_lang() { return 0; }
+```
+
+```c++
+int backticks_lang_plus() { return 0; }
+```
+
+```
+int tilde() { return 0; }
+```
+
+```
+int tilde_many() { return 0; }
+```
+
+```c++
+int tilde_many_lang() { return 0; }
+```
+
+```py
+class A:
+    pass
+```
+
+```python
+class B:
+    pass
+```
+
+```python
+class C:
+    pass
+```)",
+       R"(```
+int backticks() { return 0; }
+```
+
+```cpp
+int backticks_lang() { return 0; }
+```
+
+```c++
+int backticks_lang_plus() { return 0; }
+```
+
+```
+int tilde() { return 0; }
+```
+
+```
+int tilde_many() { return 0; }
+```
+
+```c++
+int tilde_many_lang() { return 0; }
+```
+
+```py
+class A:
+    pass
+```
+
+```python
+class B:
+    pass
+```
+
+```python
+class C:
+    pass
+```)",
+       R"(int backticks() { return 0; }
+
+int backticks_lang() { return 0; }
+
+int backticks_lang_plus() { return 0; }
+
+int tilde() { return 0; }
+
+int tilde_many() { return 0; }
+
+int tilde_many_lang() { return 0; }
+
+class A:
+    pass
+
+class B:
+    pass
+
+class C:
+    pass)"},
+      {R"(```
+// this code block is missing end backticks
+
+)",
+       R"(```
+// this code block is missing end backticks
+```)",
+       R"(```
+// this code block is missing end backticks
+```)",
+       R"(// this code block is missing end backticks)"},
+  };
+  for (const auto &C : Cases) {
+    markup::Document Doc;
+    SymbolDocCommentVisitor SymbolDoc(C.Documentation, CommentOpts);
+
+    SymbolDoc.detailedDocToMarkup(Doc);
+
+    EXPECT_EQ(Doc.asPlainText(), C.ExpectedRenderPlainText);
+    EXPECT_EQ(Doc.asMarkdown(), C.ExpectedRenderMarkdown);
+    EXPECT_EQ(Doc.asEscapedMarkdown(), C.ExpectedRenderEscapedMarkdown);
+  }
+}
+
+TEST(SymbolDocumentation, MarkdownCodeBlocksSeparation) {
+  CommentOptions CommentOpts;
+
+  struct Case {
+    llvm::StringRef Documentation;
+    llvm::StringRef ExpectedRenderEscapedMarkdown;
+    llvm::StringRef ExpectedRenderMarkdown;
+    llvm::StringRef ExpectedRenderPlainText;
+  } Cases[] = {
+      {R"(@note Show that code blocks are correctly separated
+```
+/// Without the markdown preprocessing, this line and the line above would be part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code blocks, like @p would be incorrectly interpreted.
+int function() { return 0; }
+```)",
+       R"(\*\*Note:\*\*  
+Show that code blocks are correctly separated
+
+```
+/// Without the markdown preprocessing, this line and the line above would be part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code blocks, like @p would be incorrectly interpreted.
+int function() { return 0; }
+```)",
+       R"(**Note:**  
+Show that code blocks are correctly separated
+
+```
+/// Without the markdown preprocessing, this line and the line above would be part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code blocks, like @p would be incorrectly interpreted.
+int function() { return 0; }
+```)",
+       R"(**Note:**
+Show that code blocks are correctly separated
+
+/// Without the markdown preprocessing, this line and the line above would be part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code blocks, like @p would be incorrectly interpreted.
+int function() { return 0; })"},
+      {R"(@note Show that code blocks are correctly separated
+~~~~~~~~~
+/// Without the markdown preprocessing, this line and the line above would be part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code blocks, like @p would be incorrectly interpreted.
+int function() { return 0; }
+~~~~~~~~~)",
+       R"(\*\*Note:\*\*  
+Show that code blocks are correctly separated
+
+```
+/// Without the markdown preprocessing, this line and the line above would be part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code blocks, like @p would be incorrectly interpreted.
+int function() { return 0; }
+```)",
+       R"(**Note:**  
+Show that code blocks are correctly separated
+
+```
+/// Without the markdown preprocessing, this line and the line above would be part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code blocks, like @p would be incorrectly interpreted.
+int function() { return 0; }
+```)",
+       R"(**Note:**
+Show that code blocks are correctly separated
+
+/// Without the markdown preprocessing, this line and the line above would be part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code blocks, like @p would be incorrectly interpreted.
+int function() { return 0; })"},
+  };
+  for (const auto &C : Cases) {
+    markup::Document Doc;
+    SymbolDocCommentVisitor SymbolDoc(C.Documentation, CommentOpts);
+
+    SymbolDoc.detailedDocToMarkup(Doc);
+
+    EXPECT_EQ(Doc.asPlainText(), C.ExpectedRenderPlainText);
+    EXPECT_EQ(Doc.asMarkdown(), C.ExpectedRenderMarkdown);
+    EXPECT_EQ(Doc.asEscapedMarkdown(), C.ExpectedRenderEscapedMarkdown);
+  }
+}
+
+TEST(SymbolDocumentation, MarkdownCodeSpans) {
+  CommentOptions CommentOpts;
+
+  struct Case {
+    llvm::StringRef Documentation;
+    llvm::StringRef ExpectedRenderEscapedMarkdown;
+    llvm::StringRef ExpectedRenderMarkdown;
+    llvm::StringRef ExpectedRenderPlainText;
+  } Cases[] = {
+      {R"(`this is a code span with @p and \c inside`)",
+       R"(\`this is a code span with @p and \\c inside\`)",
+       R"(`this is a code span with @p and \c inside`)",
+       R"(`this is a code span with @p and \c inside`)"},
+      {R"(<escaped> `<not-escaped>`)", R"(\<escaped> \`\<not-escaped>\`)",
+       R"(\<escaped> `<not-escaped>`)", R"(<escaped> `<not-escaped>`)"},
+      {R"(<escaped> \`<escaped> doxygen commands not parsed @p, \c, @note, \warning \`)",
+       R"(\<escaped> \\\`\<escaped> doxygen commands not parsed @p, \\c, @note, \\warning \\\`)",
+       R"(\<escaped> \`\<escaped> doxygen commands not parsed @p, \c, @note, \warning \`)",
+       R"(<escaped> \`<escaped> doxygen commands not parsed @p, \c, @note, \warning \`)"},
+      {R"(`multi
+line
+\c span`)",
+       R"(\`multi
+line
+\\c span\`)",
+       R"(`multi
+line
+\c span`)",
+       R"(`multi line
+\c span`)"},
   };
   for (const auto &C : Cases) {
     markup::Document Doc;
     SymbolDocCommentVisitor SymbolDoc(C.Documentation, CommentOpts);
 
-    SymbolDoc.docToMarkup(Doc);
+    SymbolDoc.briefToMarkup(Doc.addParagraph());
 
     EXPECT_EQ(Doc.asPlainText(), C.ExpectedRenderPlainText);
     EXPECT_EQ(Doc.asMarkdown(), C.ExpectedRenderMarkdown);
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 4139184..a324d18 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -372,7 +372,7 @@ Clang-Tidy Checks
    :doc:`readability-avoid-nested-conditional-operator <readability/avoid-nested-conditional-operator>`,
    :doc:`readability-avoid-return-with-void-value <readability/avoid-return-with-void-value>`, "Yes"
    :doc:`readability-avoid-unconditional-preprocessor-if <readability/avoid-unconditional-preprocessor-if>`,
-   :doc:`readability-braces-around-statements <readability/braces-around-statements>`,
+   :doc:`readability-braces-around-statements <readability/braces-around-statements>`, "Yes"
    :doc:`readability-const-return-type <readability/const-return-type>`, "Yes"
    :doc:`readability-container-contains <readability/container-contains>`, "Yes"
    :doc:`readability-container-data-pointer <readability/container-data-pointer>`, "Yes"
@@ -571,12 +571,12 @@ Check aliases
    :doc:`cppcoreguidelines-non-private-member-variables-in-classes <cppcoreguidelines/non-private-member-variables-in-classes>`, :doc:`misc-non-private-member-variables-in-classes <misc/non-private-member-variables-in-classes>`,
    :doc:`cppcoreguidelines-use-default-member-init <cppcoreguidelines/use-default-member-init>`, :doc:`modernize-use-default-member-init <modernize/use-default-member-init>`, "Yes"
    :doc:`fuchsia-header-anon-namespaces <fuchsia/header-anon-namespaces>`, :doc:`google-build-namespaces <google/build-namespaces>`,
-   :doc:`google-readability-braces-around-statements <google/readability-braces-around-statements>`, :doc:`readability-braces-around-statements <readability/braces-around-statements>`,
+   :doc:`google-readability-braces-around-statements <google/readability-braces-around-statements>`, :doc:`readability-braces-around-statements <readability/braces-around-statements>`, "Yes"
    :doc:`google-readability-function-size <google/readability-function-size>`, :doc:`readability-function-size <readability/function-size>`,
    :doc:`google-readability-namespace-comments <google/readability-namespace-comments>`, :doc:`llvm-namespace-comment <llvm/namespace-comment>`,
    :doc:`hicpp-avoid-c-arrays <hicpp/avoid-c-arrays>`, :doc:`modernize-avoid-c-arrays <modernize/avoid-c-arrays>`,
    :doc:`hicpp-avoid-goto <hicpp/avoid-goto>`, :doc:`cppcoreguidelines-avoid-goto <cppcoreguidelines/avoid-goto>`,
-   :doc:`hicpp-braces-around-statements <hicpp/braces-around-statements>`, :doc:`readability-braces-around-statements <readability/braces-around-statements>`,
+   :doc:`hicpp-braces-around-statements <hicpp/braces-around-statements>`, :doc:`readability-braces-around-statements <readability/braces-around-statements>`, "Yes"
    :doc:`hicpp-deprecated-headers <hicpp/deprecated-headers>`, :doc:`modernize-deprecated-headers <modernize/deprecated-headers>`, "Yes"
    :doc:`hicpp-explicit-conversions <hicpp/explicit-conversions>`, :doc:`google-explicit-constructor <google/explicit-constructor>`, "Yes"
    :doc:`hicpp-function-size <hicpp/function-size>`, :doc:`readability-function-size <readability/function-size>`,
diff --git a/clang/docs/DebuggingCoroutines.rst b/clang/docs/DebuggingCoroutines.rst
index 9eaf8d4..c62e2ea 100644
--- a/clang/docs/DebuggingCoroutines.rst
+++ b/clang/docs/DebuggingCoroutines.rst
@@ -179,8 +179,8 @@ generator and its internal state.
 
 To do so, we can simply look into the ``gen.hdl`` variable. LLDB comes with a
 pretty printer for ``std::coroutine_handle`` which will show us the internal
-state of the coroutine. For GDB, you will have to use the ``show-coro-frame``
-command provided by the :ref:`gdb-script`.
+state of the coroutine. For GDB, the pretty printer is provided by a script,
+see :ref:`gdb-script` for setup instructions.
 
 .. image:: ./coro-generator-suspended.png
 
@@ -206,23 +206,16 @@ Tracking the exact suspension point
 
 Among the compiler-generated members, the ``__coro_index`` is particularly
 important. This member identifies the suspension point at which the coroutine
-is currently suspended.
+is currently suspended. However, it is non-trivial to map this number back to
+a source code location.
 
-However, it is non-trivial to map this number back to a source code location.
-The compiler emits debug info labels for the suspension points. This allows us
-to map the suspension point index back to a source code location. In gdb, we
-can use the ``info line`` command to get the source code location of the
-suspension point.
+For GDB, the provided :ref:`gdb-script` already takes care of this and provides
+the exact line number of the suspension point as part of the coroutine handle's
+summary string. Unfortunately, LLDB's pretty-printer does not support this, yet.
+Furthermore, those labels are only emitted starting with clang 21.0.
 
-::
-
-  (gdb) info line -function coro_task -label __coro_resume_2
-  Line 45 of "llvm-example.cpp" starts at address 0x1b1b <_ZL9coro_taski.resume+555> and ends at 0x1b46 <_ZL9coro_taski.resume+598>.
-  Line 45 of "llvm-example.cpp" starts at address 0x201b <_ZL9coro_taski.destroy+555> and ends at 0x2046 <_ZL9coro_taski.destroy+598>.
-  Line 45 of "llvm-example.cpp" starts at address 0x253b <_ZL9coro_taski.cleanup+555> and ends at 0x2566 <_ZL9coro_taski.cleanup+598>.
-
-LLDB does not support looking up labels. Furthermore, those labels are only emitted
-starting with clang 21.0.
+When debugging with LLDB or when using older clang versions, we will have to use
+a different approach.
 
 For simple cases, you might still be able to guess the suspension point correctly.
 Alternatively, you might also want to modify your coroutine library to store
@@ -655,33 +648,17 @@ There are two possible approaches to do so:
    We can lookup their types and thereby get the types of promise
    and coroutine frame.
 
-In gdb, one can use the following approach to devirtualize a coroutine type,
-assuming we have a ``std::coroutine_handle`` is at address 0x418eb0:
-
-::
+In general, the second approach is preferred, as it is more portable.
 
-  (gdb) # Get the address of coroutine frame
-  (gdb) print/x *0x418eb0
-  $1 = 0x4019e0
-  (gdb) # Get the linkage name for the coroutine
-  (gdb) x 0x4019e0
-  0x4019e0 <_ZL9coro_taski>:  0xe5894855
-  (gdb) # Turn off the demangler temporarily to avoid the debugger misunderstanding the name.
-  (gdb) set demangle-style none
-  (gdb) # The coroutine frame type is 'linkage_name.coro_frame_ty'
-  (gdb) print  ('_ZL9coro_taski.coro_frame_ty')*(0x418eb0)
-  $2 = {__resume_fn = 0x4019e0 <coro_task(int)>, __destroy_fn = 0x402000 <coro_task(int)>, __promise = {...}, ...}
-
-In practice, one would use the ``show-coro-frame`` command provided by the
-:ref:`gdb-script`.
+To do so, we look up the types in the destroy function and not the resume function
+because the resume function pointer will be set to a ``nullptr`` as soon as a
+coroutine reaches its final suspension point. If we used the resume function,
+devirtualization would hence fail for all coroutines that have reached their final
+suspension point.
 
 LLDB comes with devirtualization support out of the box, as part of the
-pretty-printer for ``std::coroutine_handle``. Internally, this pretty-printer
-uses the second approach. We look up the types in the destroy function and not
-the resume function because the resume function pointer will be set to a
-``nullptr`` as soon as a coroutine reaches its final suspension point. If we used
-the resume function, devirtualization would hence fail for all coroutines that
-have reached their final suspension point.
+pretty-printer for ``std::coroutine_handle``. For GDB, a similar pretty-printer
+is provided by the :ref:`gdb-script`.
 
 Interpreting the coroutine frame in optimized builds
 ----------------------------------------------------
@@ -756,6 +733,26 @@ should not be thought of as directly representing the variables in the C++
 source.
 
 
+Mapping suspension point indices to source code locations
+---------------------------------------------------------
+
+To aid in mapping a ``__coro_index`` back to a source code location, clang 21.0
+and newer emit special, compiler-generated labels for the suspension points.
+
+In gdb, we can use the ``info line`` command to get the source code location of
+the suspension point.
+
+::
+
+  (gdb) info line -function coro_task -label __coro_resume_2
+  Line 45 of "llvm-example.cpp" starts at address 0x1b1b <_ZL9coro_taski.resume+555> and ends at 0x1b46 <_ZL9coro_taski.resume+598>.
+  Line 45 of "llvm-example.cpp" starts at address 0x201b <_ZL9coro_taski.destroy+555> and ends at 0x2046 <_ZL9coro_taski.destroy+598>.
+  Line 45 of "llvm-example.cpp" starts at address 0x253b <_ZL9coro_taski.cleanup+555> and ends at 0x2566 <_ZL9coro_taski.cleanup+598>.
+
+LLDB does not support looking up labels, yet. For this reason, LLDB's pretty-printer
+does not show the exact line number of the suspension point.
+
+
 Resources
 =========
 
@@ -1017,156 +1014,270 @@ Note that this script requires LLDB 21.0 or newer.
 GDB Debugger Script
 -------------------
 
-For GDB, the following script provides a couple of useful commands:
+The following script provides:
 
-* ``async-bt`` to print the stack trace of a coroutine
-* ``show-coro-frame`` to print the coroutine frame, similar to
-  LLDB's builtin pretty-printer for coroutine frames
+* a pretty-printer for coroutine handles
+* a frame filter to add coroutine frames to the built-in ``bt`` command
+* the ``get_coro_frame`` and ``get_coro_promise`` functions to be used in
+  expressions, e.g. ``p get_coro_promise(fib.coro_hdl)->current_state``
+
+It can be loaded into GDB using ``source gdb_coro_debugging.py``.
+To load this by default, add this command to your ``~/.gdbinit`` file.
 
 .. code-block:: python
 
-  # debugging-helper.py
+  # gdb_coro_debugging.py
   import gdb
   from gdb.FrameDecorator import FrameDecorator
 
-  class SymValueWrapper():
-      def __init__(self, symbol, value):
-          self.sym = symbol
-          self.val = value
+  import typing
+  import re
 
-      def __str__(self):
-          return str(self.sym) + " = " + str(self.val)
+  def _load_pointer_at(addr: int):
+      return gdb.Value(addr).reinterpret_cast(gdb.lookup_type('void').pointer().pointer()).dereference()
 
-  def get_long_pointer_size():
-      return gdb.lookup_type('long').pointer().sizeof
+  """
+  Devirtualized coroutine frame.
 
-  def cast_addr2long_pointer(addr):
-      return gdb.Value(addr).cast(gdb.lookup_type('long').pointer())
+  Devirtualizes the promise and frame pointer types by inspecting
+  the destroy function.
 
-  def dereference(addr):
-      return long(cast_addr2long_pointer(addr).dereference())
+  Implements `to_string` and `children` to be used by `gdb.printing.PrettyPrinter`.
+  Base class for `CoroutineHandlePrinter`.
+  """
+  class DevirtualizedCoroFrame:
+      def __init__(self, frame_ptr_raw: int, val: gdb.Value | None = None):
+          self.val = val
+          self.frame_ptr_raw = frame_ptr_raw
 
-  class CoroutineFrame(object):
-      def __init__(self, task_addr):
-          self.frame_addr = task_addr
-          self.resume_addr = task_addr
-          self.destroy_addr = task_addr + get_long_pointer_size()
-          self.promise_addr = task_addr + get_long_pointer_size() * 2
-          # In the example, the continuation is the first field member of the promise_type.
-          # So they have the same addresses.
-          # If we want to generalize the scripts to other coroutine types, we need to be sure
-          # the continuation field is the first member of promise_type.
-          self.continuation_addr = self.promise_addr
+          # Get the resume and destroy pointers.
+          if frame_ptr_raw == 0:
+              self.resume_ptr = None
+              self.destroy_ptr = None
+              self.promise_ptr = None
+              self.frame_ptr = gdb.Value(frame_ptr_raw).reinterpret_cast(gdb.lookup_type("void").pointer())
+              return
 
-      def next_task_addr(self):
-          return dereference(self.continuation_addr)
+          # Get the resume and destroy pointers.
+          self.resume_ptr = _load_pointer_at(frame_ptr_raw)
+          self.destroy_ptr = _load_pointer_at(frame_ptr_raw + 8)
+
+          # Devirtualize the promise and frame pointer types.
+          frame_type = gdb.lookup_type("void")
+          promise_type = gdb.lookup_type("void")
+          self.destroy_func = gdb.block_for_pc(int(self.destroy_ptr))
+          if self.destroy_func is not None:
+              frame_var = gdb.lookup_symbol("__coro_frame", self.destroy_func, gdb.SYMBOL_VAR_DOMAIN)[0]
+              if frame_var is not None:
+                  frame_type = frame_var.type
+              promise_var = gdb.lookup_symbol("__promise", self.destroy_func, gdb.SYMBOL_VAR_DOMAIN)[0]
+              if promise_var is not None:
+                  promise_type = promise_var.type.strip_typedefs()
+
+          # If the type has a template argument, prefer it over the devirtualized type.
+          if self.val is not None:
+              promise_type_template_arg = self.val.type.template_argument(0)
+              if promise_type_template_arg is not None and promise_type_template_arg.code != gdb.TYPE_CODE_VOID:
+                  promise_type = promise_type_template_arg
+
+          self.promise_ptr = gdb.Value(frame_ptr_raw + 16).reinterpret_cast(promise_type.pointer())
+          self.frame_ptr = gdb.Value(frame_ptr_raw).reinterpret_cast(frame_type.pointer())
+
+          # Try to get the suspension point index and look up the exact line entry.
+          self.suspension_point_index = int(self.frame_ptr.dereference()["__coro_index"]) if frame_type.code == gdb.TYPE_CODE_STRUCT else None
+          self.resume_func = gdb.block_for_pc(int(self.resume_ptr))
+          self.resume_label = None
+          if self.resume_func is not None and self.suspension_point_index is not None:
+              label_name = f"__coro_resume_{self.suspension_point_index}"
+              self.resume_label = gdb.lookup_symbol(label_name, self.resume_func, gdb.SYMBOL_LABEL_DOMAIN)[0]
+
+      def get_function_name(self):
+          if self.destroy_func is None:
+              return None
+          name = self.destroy_func.function.name
+          # Strip the "clone" suffix if it exists.
+          if "() [clone " in name:
+              name = name[:name.index("() [clone ")]
+          return name
+
+      def to_string(self):
+          result = "coro(" + str(self.frame_ptr_raw) + ")"
+          if self.destroy_func is not None:
+              result += ": " + self.get_function_name()
+          if self.resume_label is not None:
+              result += ", line " + str(self.resume_label.line)
+          if self.suspension_point_index is not None:
+              result += ", suspension point " + str(self.suspension_point_index)
+          return result
+
+      def children(self):
+          if self.resume_ptr is None:
+              return [
+                  ("coro_frame", self.frame_ptr),
+              ]
+          else:
+              return [
+                  ("resume", self.resume_ptr),
+                  ("destroy", self.destroy_ptr),
+                  ("promise", self.promise_ptr),
+                  ("coro_frame", self.frame_ptr)
+              ]
 
-  class CoroutineFrameDecorator(FrameDecorator):
-      def __init__(self, coro_frame):
-          super(CoroutineFrameDecorator, self).__init__(None)
-          self.coro_frame = coro_frame
-          self.resume_func = dereference(self.coro_frame.resume_addr)
-          self.resume_func_block = gdb.block_for_pc(self.resume_func)
-          if self.resume_func_block is None:
-              raise Exception('Not stackless coroutine.')
-          self.line_info = gdb.find_pc_line(self.resume_func)
 
-      def address(self):
-          return self.resume_func
+  # Works for both libc++ and libstdc++.
+  libcxx_corohdl_regex = re.compile('^std::__[A-Za-z0-9]+::coroutine_handle<.+>$|^std::coroutine_handle<.+>(( )?&)?$')
 
-      def filename(self):
-          return self.line_info.symtab.filename
+  def _extract_coro_frame_ptr_from_handle(val: gdb.Value):
+      if libcxx_corohdl_regex.match(val.type.strip_typedefs().name) is None:
+          raise ValueError("Expected a std::coroutine_handle, got %s" % val.type.strip_typedefs().name)
 
-      def frame_args(self):
-          return [SymValueWrapper("frame_addr", cast_addr2long_pointer(self.coro_frame.frame_addr)),
-                  SymValueWrapper("promise_addr", cast_addr2long_pointer(self.coro_frame.promise_addr)),
-                  SymValueWrapper("continuation_addr", cast_addr2long_pointer(self.coro_frame.continuation_addr))
-                  ]
+      # We expect the coroutine handle to have a single field, which is the frame pointer.
+      # This heuristic works for both libc++ and libstdc++.
+      fields = val.type.fields()
+      if len(fields) != 1:
+          raise ValueError("Expected 1 field, got %d" % len(fields))
+      return int(val[fields[0]])
 
-      def function(self):
-          return self.resume_func_block.function.print_name
 
-      def line(self):
-          return self.line_info.line
-
-  class StripDecorator(FrameDecorator):
-      def __init__(self, frame):
-          super(StripDecorator, self).__init__(frame)
-          self.frame = frame
-          f = frame.function()
-          self.function_name = f
-
-      def __str__(self, shift = 2):
-          addr = "" if self.address() is None else '%#x' % self.address() + " in "
-          location = "" if self.filename() is None else " at " + self.filename() + ":" + str(self.line())
-          return addr + self.function() + " " + str([str(args) for args in self.frame_args()]) + location
-
-  class CoroutineFilter:
-      def create_coroutine_frames(self, task_addr):
-          frames = []
-          while task_addr != 0:
-              coro_frame = CoroutineFrame(task_addr)
-              frames.append(CoroutineFrameDecorator(coro_frame))
-              task_addr = coro_frame.next_task_addr()
-          return frames
-
-  class AsyncStack(gdb.Command):
+  """
+  Pretty printer for `std::coroutine_handle<T>`
+
+  Works for both libc++ and libstdc++.
+
+  It prints the coroutine handle as a struct with the following fields:
+  - resume: the resume function pointer
+  - destroy: the destroy function pointer
+  - promise: the promise pointer
+  - coro_frame: the coroutine frame pointer
+
+  Most of the functionality is implemented in `DevirtualizedCoroFrame`.
+  """
+  class CoroutineHandlePrinter(DevirtualizedCoroFrame):
+      def __init__(self, val : gdb.Value):
+          frame_ptr_raw = _extract_coro_frame_ptr_from_handle(val)
+          super(CoroutineHandlePrinter, self).__init__(frame_ptr_raw, val)
+
+
+  def build_pretty_printer():
+      pp = gdb.printing.RegexpCollectionPrettyPrinter("coroutine")
+      pp.add_printer('std::coroutine_handle', libcxx_corohdl_regex, CoroutineHandlePrinter)
+      return pp
+
+  gdb.printing.register_pretty_printer(
+      gdb.current_objfile(),
+      build_pretty_printer())
+
+
+  """
+  Get the coroutine frame pointer from a coroutine handle.
+
+  Usage:
+  ```
+  p *get_coro_frame(coroutine_hdl)
+  ```
+  """
+  class GetCoroFrame(gdb.Function):
       def __init__(self):
-          super(AsyncStack, self).__init__("async-bt", gdb.COMMAND_USER)
+          super(GetCoroFrame, self).__init__("get_coro_frame")
 
-      def invoke(self, arg, from_tty):
-          coroutine_filter = CoroutineFilter()
-          argv = gdb.string_to_argv(arg)
-          if len(argv) == 0:
-              try:
-                  task = gdb.parse_and_eval('__coro_frame')
-                  task = int(str(task.address), 16)
-              except Exception:
-                  print ("Can't find __coro_frame in current context.\n" +
-                        "Please use `async-bt` in stackless coroutine context.")
-                  return
-          elif len(argv) != 1:
-              print("usage: async-bt <pointer to task>")
-              return
-          else:
-              task = int(argv[0], 16)
+      def invoke(self, coroutine_hdl_raw):
+          return CoroutineHandlePrinter(coroutine_hdl_raw).frame_ptr
+
+  GetCoroFrame()
 
-          frames = coroutine_filter.create_coroutine_frames(task)
-          i = 0
-          for f in frames:
-              print '#'+ str(i), str(StripDecorator(f))
-              i += 1
-          return
 
-  AsyncStack()
+  """
+  Get the coroutine frame pointer from a coroutine handle.
 
-  class ShowCoroFrame(gdb.Command):
+  Usage:
+  ```
+  p *get_coro_promise(coroutine_hdl)
+  ```
+  """
+  class GetCoroFrame(gdb.Function):
       def __init__(self):
-          super(ShowCoroFrame, self).__init__("show-coro-frame", gdb.COMMAND_USER)
+          super(GetCoroFrame, self).__init__("get_coro_promise")
 
-      def invoke(self, arg, from_tty):
-          argv = gdb.string_to_argv(arg)
-          if len(argv) != 1:
-              print("usage: show-coro-frame <address of coroutine frame>")
-              return
+      def invoke(self, coroutine_hdl_raw):
+          return CoroutineHandlePrinter(coroutine_hdl_raw).promise_ptr
 
-          addr = int(argv[0], 16)
-          block = gdb.block_for_pc(long(cast_addr2long_pointer(addr).dereference()))
-          if block is None:
-              print "block " + str(addr) + " is None."
-              return
+  GetCoroFrame()
+
+
+  """
+  Decorator for coroutine frames.
+
+  Used by `CoroutineFrameFilter` to add the coroutine frames to the built-in `bt` command.
+  """
+  class CoroutineFrameDecorator(FrameDecorator):
+      def __init__(self, coro_frame: DevirtualizedCoroFrame, inferior_frame: gdb.Frame):
+          super(CoroutineFrameDecorator, self).__init__(inferior_frame)
+          self.coro_frame = coro_frame
+
+      def function(self):
+          func_name = self.coro_frame.get_function_name()
+          if func_name is not None:
+              return "[async] " + func_name
+          return "[async] coroutine (coro_frame=" + str(self.coro_frame.frame_ptr_raw) + ")"
+
+      def address(self):
+          return None
+
+      def filename(self):
+          if self.coro_frame.destroy_func is not None:
+              return self.coro_frame.destroy_func.function.symtab.filename
+          return None
+
+      def line(self):
+          if self.coro_frame.resume_label is not None:
+              return self.coro_frame.resume_label.line
+          return None
+
+      def frame_args(self):
+          return []
+
+      def frame_locals(self):
+          return []
+
+
+  def _get_continuation(promise: gdb.Value) -> DevirtualizedCoroFrame | None:
+      try:
+          # TODO: adjust this according for your coroutine framework
+          return DevirtualizedCoroFrame(_extract_coro_frame_ptr_from_handle(promise["continuation"]))
+      except Exception as e:
+          return None
 
-          # Disable demangling since gdb will treat names starting with `_Z`(The marker for Itanium ABI) specially.
-          gdb.execute("set demangle-style none")
 
-          coro_frame_type = gdb.lookup_type(block.function.linkage_name + ".coro_frame_ty")
-          coro_frame_ptr_type = coro_frame_type.pointer()
-          coro_frame = gdb.Value(addr).cast(coro_frame_ptr_type).dereference()
+  def _create_coroutine_frames(coro_frame: DevirtualizedCoroFrame, inferior_frame: gdb.Frame):
+      while coro_frame is not None:
+          yield CoroutineFrameDecorator(coro_frame, inferior_frame)
+          coro_frame = _get_continuation(coro_frame.promise_ptr)
 
-          gdb.execute("set demangle-style auto")
-          gdb.write(coro_frame.format_string(pretty_structs = True))
 
-  ShowCoroFrame()
+  """
+  Frame filter to add coroutine frames to the built-in `bt` command.
+  """
+  class CppCoroutineFrameFilter():
+      def __init__(self):
+          self.name = "CppCoroutineFrameFilter"
+          self.priority = 50
+          self.enabled = True
+          # Register this frame filter with the global frame_filters dictionary.
+          gdb.frame_filters[self.name] = self
+
+      def filter(self, frame_iter: typing.Iterable[gdb.FrameDecorator]):
+          for frame in frame_iter:
+              yield frame
+              inferior_frame = frame.inferior_frame()
+              try:
+                  promise_ptr = inferior_frame.read_var("__promise")
+              except Exception:
+                  continue
+              parent_coro = _get_continuation(promise_ptr)
+              if parent_coro is not None:
+                  yield from _create_coroutine_frames(parent_coro, inferior_frame)
+
+  CppCoroutineFrameFilter()
 
 Further Reading
 ---------------
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 12c2ada..5745e4b 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2778,7 +2778,7 @@ usual build cycle when using sample profilers for optimization:
 
      > clang-cl /O2 -gdwarf -gline-tables-only ^
        /clang:-fdebug-info-for-profiling /clang:-funique-internal-linkage-names ^
-       code.cc /Fe:code /fuse-ld=lld /link /debug:dwarf
+       code.cc /Fe:code -fuse-ld=lld /link /debug:dwarf
 
 .. note::
 
@@ -2861,13 +2861,15 @@ usual build cycle when using sample profilers for optimization:
    that executes faster than the original one. Note that you are not
    required to build the code with the exact same arguments that you
    used in the first step. The only requirement is that you build the code
-   with the same debug info options and ``-fprofile-sample-use``.
+   with the same debug info options and ``-fprofile-sample-use``. ``-gdwarf``
+   and ``-gline-tables-only`` can be omitted if you do not need debug info
+   in the final binary.
 
    On Linux:
 
    .. code-block:: console
 
-     $ clang++ -O2 -gline-tables-only \
+     $ clang++ -O2 \
        -fdebug-info-for-profiling -funique-internal-linkage-names \
        -fprofile-sample-use=code.prof code.cc -o code
 
@@ -2875,9 +2877,9 @@ usual build cycle when using sample profilers for optimization:
 
    .. code-block:: winbatch
 
-     > clang-cl /O2 -gdwarf -gline-tables-only ^
+     > clang-cl /O2 ^
        /clang:-fdebug-info-for-profiling /clang:-funique-internal-linkage-names ^
-       -fprofile-sample-use=code.prof code.cc /Fe:code -fuse-ld=lld /link /debug:dwarf
+       -fprofile-sample-use=code.prof code.cc /Fe:code
 
    [OPTIONAL] Sampling-based profiles can have inaccuracies or missing block/
    edge counters. The profile inference algorithm (profi) can be used to infer
@@ -2886,7 +2888,7 @@ usual build cycle when using sample profilers for optimization:
 
    .. code-block:: console
 
-     $ clang++ -fsample-profile-use-profi -O2 -gline-tables-only \
+     $ clang++ -fsample-profile-use-profi -O2 \
        -fdebug-info-for-profiling -funique-internal-linkage-names \
        -fprofile-sample-use=code.prof code.cc -o code
 
@@ -2894,9 +2896,9 @@ usual build cycle when using sample profilers for optimization:
 
    .. code-block:: winbatch
 
-     > clang-cl /clang:-fsample-profile-use-profi /O2 -gdwarf -gline-tables-only ^
+     > clang-cl /clang:-fsample-profile-use-profi /O2 ^
        /clang:-fdebug-info-for-profiling /clang:-funique-internal-linkage-names ^
-       -fprofile-sample-use=code.prof code.cc /Fe:code -fuse-ld=lld /link /debug:dwarf
+       -fprofile-sample-use=code.prof code.cc /Fe:code
 
 Sample Profile Formats
 """"""""""""""""""""""
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index e301cf1..0d2316f 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -2173,7 +2173,7 @@ private:
     if (Tok.is(tok::kw___attribute)) {
       ParsedAttributes Attrs(AttrFactory);
       ParseGNUAttributes(Attrs, LateAttrs, &D);
-      D.takeAttributes(Attrs);
+      D.takeAttributesAppending(Attrs);
     }
   }
 
@@ -2272,7 +2272,7 @@ private:
     if (isAllowedCXX11AttributeSpecifier()) {
       ParsedAttributes Attrs(AttrFactory);
       ParseCXX11Attributes(Attrs);
-      D.takeAttributes(Attrs);
+      D.takeAttributesAppending(Attrs);
     }
   }
 
@@ -2292,7 +2292,7 @@ private:
       ParsedAttributes AttrsWithRange(AttrFactory);
       ParseMicrosoftAttributes(AttrsWithRange);
       AttrsParsed = !AttrsWithRange.empty();
-      Attrs.takeAllFrom(AttrsWithRange);
+      Attrs.takeAllAppendingFrom(AttrsWithRange);
     }
     return AttrsParsed;
   }
@@ -5175,7 +5175,7 @@ private:
     if (Tok.is(tok::colon)) {
       ParsedAttributes Attrs(AttrFactory);
       ParseHLSLAnnotations(Attrs, EndLoc, CouldBeBitField);
-      D.takeAttributes(Attrs);
+      D.takeAttributesAppending(Attrs);
       return true;
     }
     return false;
diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h
index c1a99a1..43a48c9 100644
--- a/clang/include/clang/Sema/DeclSpec.h
+++ b/clang/include/clang/Sema/DeclSpec.h
@@ -835,7 +835,7 @@ public:
   /// \endcode
   ///
   void addAttributes(const ParsedAttributesView &AL) {
-    Attrs.addAll(AL.begin(), AL.end());
+    Attrs.prepend(AL.begin(), AL.end());
   }
 
   bool hasAttributes() const { return !Attrs.empty(); }
@@ -843,8 +843,8 @@ public:
   ParsedAttributes &getAttributes() { return Attrs; }
   const ParsedAttributes &getAttributes() const { return Attrs; }
 
-  void takeAttributesFrom(ParsedAttributes &attrs) {
-    Attrs.takeAllFrom(attrs);
+  void takeAttributesAppendingingFrom(ParsedAttributes &attrs) {
+    Attrs.takeAllAppendingFrom(attrs);
   }
 
   /// Finish - This does final analysis of the declspec, issuing diagnostics for
@@ -2327,7 +2327,7 @@ public:
   void AddTypeInfo(const DeclaratorChunk &TI, ParsedAttributes &&attrs,
                    SourceLocation EndLoc) {
     DeclTypeInfo.push_back(TI);
-    DeclTypeInfo.back().getAttrs().addAll(attrs.begin(), attrs.end());
+    DeclTypeInfo.back().getAttrs().prepend(attrs.begin(), attrs.end());
     getAttributePool().takeAllFrom(attrs.getPool());
 
     if (!EndLoc.isInvalid())
@@ -2638,8 +2638,8 @@ public:
     return InventedTemplateParameterList;
   }
 
-  /// takeAttributes - Takes attributes from the given parsed-attributes
-  /// set and add them to this declarator.
+  /// takeAttributesAppending - Takes attributes from the given
+  /// ParsedAttributes set and add them to this declarator.
   ///
   /// These examples both add 3 attributes to "var":
   ///  short int var __attribute__((aligned(16),common,deprecated));
@@ -2647,8 +2647,8 @@ public:
   ///                                 __attribute__((common,deprecated));
   ///
   /// Also extends the range of the declarator.
-  void takeAttributes(ParsedAttributes &attrs) {
-    Attrs.takeAllFrom(attrs);
+  void takeAttributesAppending(ParsedAttributes &attrs) {
+    Attrs.takeAllAppendingFrom(attrs);
 
     if (attrs.Range.getEnd().isValid())
       SetRangeEnd(attrs.Range.getEnd());
diff --git a/clang/include/clang/Sema/ParsedAttr.h b/clang/include/clang/Sema/ParsedAttr.h
index 2edee7e..5387f9f 100644
--- a/clang/include/clang/Sema/ParsedAttr.h
+++ b/clang/include/clang/Sema/ParsedAttr.h
@@ -856,19 +856,19 @@ public:
     friend class ParsedAttributesView;
   };
 
-  void addAll(iterator B, iterator E) {
+  void prepend(iterator B, iterator E) {
     AttrList.insert(AttrList.begin(), B.I, E.I);
   }
 
-  void addAll(const_iterator B, const_iterator E) {
+  void prepend(const_iterator B, const_iterator E) {
     AttrList.insert(AttrList.begin(), B.I, E.I);
   }
 
-  void addAllAtEnd(iterator B, iterator E) {
+  void append(iterator B, iterator E) {
     AttrList.insert(AttrList.end(), B.I, E.I);
   }
 
-  void addAllAtEnd(const_iterator B, const_iterator E) {
+  void append(const_iterator B, const_iterator E) {
     AttrList.insert(AttrList.end(), B.I, E.I);
   }
 
@@ -943,18 +943,18 @@ public:
 
   AttributePool &getPool() const { return pool; }
 
-  void takeAllFrom(ParsedAttributes &Other) {
+  void takeAllPrependingFrom(ParsedAttributes &Other) {
     assert(&Other != this &&
            "ParsedAttributes can't take attributes from itself");
-    addAll(Other.begin(), Other.end());
+    prepend(Other.begin(), Other.end());
     Other.clearListOnly();
     pool.takeAllFrom(Other.pool);
   }
 
-  void takeAllAtEndFrom(ParsedAttributes &Other) {
+  void takeAllAppendingFrom(ParsedAttributes &Other) {
     assert(&Other != this &&
            "ParsedAttributes can't take attributes from itself");
-    addAllAtEnd(Other.begin(), Other.end());
+    append(Other.begin(), Other.end());
     Other.clearListOnly();
     pool.takeAllFrom(Other.pool);
   }
diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index 16e35b0..8eb4d34e 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -301,11 +301,10 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
   // .gnu.hash needs symbols to be grouped by hash code whereas the MIPS
   // ABI requires a mapping between the GOT and the symbol table.
   // Android loader does not support .gnu.hash until API 23.
-  // Hexagon linker/loader does not support .gnu.hash
+  // Hexagon linker/loader does not support .gnu.hash.
+  // SUSE SLES 11 will stop being supported Mar 2028.
   if (!IsMips && !IsHexagon) {
-    if (Distro.IsOpenSUSE() || Distro == Distro::UbuntuLucid ||
-        Distro == Distro::UbuntuJaunty || Distro == Distro::UbuntuKarmic ||
-        (IsAndroid && Triple.isAndroidVersionLT(23)))
+    if (Distro.IsOpenSUSE() || (IsAndroid && Triple.isAndroidVersionLT(23)))
       ExtraOpts.push_back("--hash-style=both");
     else
       ExtraOpts.push_back("--hash-style=gnu");
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index d6cd7eb..e4b158e 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -1934,12 +1934,12 @@ Parser::DeclGroupPtrTy Parser::ParseSimpleDeclaration(
     bool RequireSemi, ForRangeInit *FRI, SourceLocation *DeclSpecStart) {
   // Need to retain these for diagnostics before we add them to the DeclSepc.
   ParsedAttributesView OriginalDeclSpecAttrs;
-  OriginalDeclSpecAttrs.addAll(DeclSpecAttrs.begin(), DeclSpecAttrs.end());
+  OriginalDeclSpecAttrs.prepend(DeclSpecAttrs.begin(), DeclSpecAttrs.end());
   OriginalDeclSpecAttrs.Range = DeclSpecAttrs.Range;
 
   // Parse the common declaration-specifiers piece.
   ParsingDeclSpec DS(*this);
-  DS.takeAttributesFrom(DeclSpecAttrs);
+  DS.takeAttributesAppendingingFrom(DeclSpecAttrs);
 
   ParsedTemplateInfo TemplateInfo;
   DeclSpecContext DSContext = getDeclSpecContextFromDeclaratorContext(Context);
@@ -2135,7 +2135,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS,
   // list. This ensures that we will not attempt to interpret them as statement
   // attributes higher up the callchain.
   ParsedAttributes LocalAttrs(AttrFactory);
-  LocalAttrs.takeAllFrom(Attrs);
+  LocalAttrs.takeAllPrependingFrom(Attrs);
   ParsingDeclarator D(*this, DS, LocalAttrs, Context);
   if (TemplateInfo.TemplateParams)
     D.setTemplateParameterLists(*TemplateInfo.TemplateParams);
@@ -3462,7 +3462,7 @@ void Parser::ParseDeclarationSpecifiers(
           PA.setInvalid();
         }
 
-        DS.takeAttributesFrom(attrs);
+        DS.takeAttributesAppendingingFrom(attrs);
       }
 
       // If this is not a declaration specifier token, we're done reading decl
@@ -3689,7 +3689,7 @@ void Parser::ParseDeclarationSpecifiers(
         if (ParseImplicitInt(DS, &SS, TemplateInfo, AS, DSContext, Attrs)) {
           if (!Attrs.empty()) {
             AttrsLastTime = true;
-            attrs.takeAllFrom(Attrs);
+            attrs.takeAllAppendingFrom(Attrs);
           }
           continue;
         }
@@ -3854,7 +3854,7 @@ void Parser::ParseDeclarationSpecifiers(
         if (ParseImplicitInt(DS, nullptr, TemplateInfo, AS, DSContext, Attrs)) {
           if (!Attrs.empty()) {
             AttrsLastTime = true;
-            attrs.takeAllFrom(Attrs);
+            attrs.takeAllAppendingFrom(Attrs);
           }
           continue;
         }
@@ -4463,7 +4463,7 @@ void Parser::ParseDeclarationSpecifiers(
       // take them over and handle them here.
       if (!Attributes.empty()) {
         AttrsLastTime = true;
-        attrs.takeAllFrom(Attributes);
+        attrs.takeAllAppendingFrom(Attributes);
       }
       continue;
     }
@@ -4830,7 +4830,7 @@ void Parser::ParseLexedCAttribute(LateParsedAttribute &LA, bool EnterScope,
     ConsumeAnyToken();
 
   if (OutAttrs) {
-    OutAttrs->takeAllFrom(Attrs);
+    OutAttrs->takeAllAppendingFrom(Attrs);
   }
 }
 
@@ -6122,7 +6122,7 @@ void Parser::ParseTypeQualifierListOpt(
       isAllowedCXX11AttributeSpecifier()) {
     ParsedAttributes Attrs(AttrFactory);
     ParseCXX11Attributes(Attrs);
-    DS.takeAttributesFrom(Attrs);
+    DS.takeAttributesAppendingingFrom(Attrs);
   }
 
   SourceLocation EndLoc;
@@ -7483,7 +7483,7 @@ void Parser::ParseParameterDeclarationClause(
       // Take them so that we only apply the attributes to the first parameter.
       // We have already started parsing the decl-specifier sequence, so don't
       // parse any parameter-declaration pieces that precede it.
-      ArgDeclSpecAttrs.takeAllFrom(FirstArgAttrs);
+      ArgDeclSpecAttrs.takeAllPrependingFrom(FirstArgAttrs);
     } else {
       // Parse any C++11 attributes.
       MaybeParseCXX11Attributes(ArgDeclAttrs);
@@ -7505,7 +7505,7 @@ void Parser::ParseParameterDeclarationClause(
                                DeclSpecContext::DSC_normal,
                                /*LateAttrs=*/nullptr, AllowImplicitTypename);
 
-    DS.takeAttributesFrom(ArgDeclSpecAttrs);
+    DS.takeAttributesAppendingingFrom(ArgDeclSpecAttrs);
 
     // Parse the declarator.  This is "PrototypeContext" or
     // "LambdaExprParameterContext", because we must accept either
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index 19f9412..b96968d 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -739,7 +739,7 @@ Parser::DeclGroupPtrTy Parser::ParseUsingDeclaration(
         << FixItHint::CreateInsertionFromRange(
                Tok.getLocation(), CharSourceRange::getTokenRange(Range))
         << FixItHint::CreateRemoval(Range);
-    Attrs.takeAllFrom(MisplacedAttrs);
+    Attrs.takeAllPrependingFrom(MisplacedAttrs);
   }
 
   // Maybe this is an alias-declaration.
@@ -787,7 +787,7 @@ Parser::DeclGroupPtrTy Parser::ParseUsingDeclaration(
     // Parse (optional) attributes.
     MaybeParseAttributes(PAKM_GNU | PAKM_CXX11, Attrs);
     DiagnoseCXX11AttributeExtension(Attrs);
-    Attrs.addAll(PrefixAttrs.begin(), PrefixAttrs.end());
+    Attrs.prepend(PrefixAttrs.begin(), PrefixAttrs.end());
 
     if (InvalidDeclarator)
       SkipUntil(tok::comma, tok::semi, StopBeforeMatch);
@@ -1948,7 +1948,7 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
 
       // Recover by adding misplaced attributes to the attribute list
       // of the class so they can be applied on the class later.
-      attrs.takeAllFrom(Attributes);
+      attrs.takeAllAppendingFrom(Attributes);
     }
   }
 
@@ -2842,7 +2842,7 @@ Parser::DeclGroupPtrTy Parser::ParseCXXClassMemberDeclaration(
   // decl-specifier-seq:
   // Parse the common declaration-specifiers piece.
   ParsingDeclSpec DS(*this, TemplateDiags);
-  DS.takeAttributesFrom(DeclSpecAttrs);
+  DS.takeAttributesAppendingingFrom(DeclSpecAttrs);
 
   if (MalformedTypeSpec)
     DS.SetTypeSpecError();
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 90191b0..74f87a8 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -1244,7 +1244,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer(
         break;
     }
 
-    D.takeAttributes(Attributes);
+    D.takeAttributesAppending(Attributes);
   }
 
   MultiParseScope TemplateParamScope(*this);
diff --git a/clang/lib/Parse/ParseObjc.cpp b/clang/lib/Parse/ParseObjc.cpp
index a64fb02..0b9f113 100644
--- a/clang/lib/Parse/ParseObjc.cpp
+++ b/clang/lib/Parse/ParseObjc.cpp
@@ -43,7 +43,7 @@ void Parser::MaybeSkipAttributes(tok::ObjCKeywordKind Kind) {
 Parser::DeclGroupPtrTy
 Parser::ParseObjCAtDirectives(ParsedAttributes &DeclAttrs,
                               ParsedAttributes &DeclSpecAttrs) {
-  DeclAttrs.takeAllFrom(DeclSpecAttrs);
+  DeclAttrs.takeAllPrependingFrom(DeclSpecAttrs);
 
   SourceLocation AtLoc = ConsumeToken(); // the "@"
 
@@ -1065,8 +1065,8 @@ void Parser::ParseObjCTypeQualifierList(ObjCDeclSpec &DS,
 
 /// Take all the decl attributes out of the given list and add
 /// them to the given attribute set.
-static void takeDeclAttributes(ParsedAttributesView &attrs,
-                               ParsedAttributesView &from) {
+static void takeDeclAttributesAppend(ParsedAttributesView &attrs,
+                                     ParsedAttributesView &from) {
   for (auto &AL : llvm::reverse(from)) {
     if (!AL.isUsedAsTypeAttr()) {
       from.remove(&AL);
@@ -1088,10 +1088,10 @@ static void takeDeclAttributes(ParsedAttributes &attrs,
   attrs.getPool().takeAllFrom(D.getDeclSpec().getAttributePool());
 
   // Now actually move the attributes over.
-  takeDeclAttributes(attrs, D.getMutableDeclSpec().getAttributes());
-  takeDeclAttributes(attrs, D.getAttributes());
+  takeDeclAttributesAppend(attrs, D.getMutableDeclSpec().getAttributes());
+  takeDeclAttributesAppend(attrs, D.getAttributes());
   for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i)
-    takeDeclAttributes(attrs, D.getTypeObject(i).getAttrs());
+    takeDeclAttributesAppend(attrs, D.getTypeObject(i).getAttrs());
 }
 
 ParsedType Parser::ParseObjCTypeName(ObjCDeclSpec &DS,
diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp
index 2e7af12..9203898 100644
--- a/clang/lib/Parse/ParseStmt.cpp
+++ b/clang/lib/Parse/ParseStmt.cpp
@@ -718,7 +718,7 @@ StmtResult Parser::ParseLabeledStatement(ParsedAttributes &Attrs,
     // and followed by a semicolon, GCC will reject (it appears to parse the
     // attributes as part of a statement in that case). That looks like a bug.
     if (!getLangOpts().CPlusPlus || Tok.is(tok::semi))
-      Attrs.takeAllFrom(TempAttrs);
+      Attrs.takeAllAppendingFrom(TempAttrs);
     else {
       StmtVector Stmts;
       ParsedAttributes EmptyCXX11Attrs(AttrFactory);
@@ -2407,7 +2407,7 @@ StmtResult Parser::ParsePragmaLoopHint(StmtVector &Stmts,
       Stmts, StmtCtx, TrailingElseLoc, Attrs, EmptyDeclSpecAttrs,
       PrecedingLabel);
 
-  Attrs.takeAllFrom(TempAttrs);
+  Attrs.takeAllPrependingFrom(TempAttrs);
 
   // Start of attribute range may already be set for some invalid input.
   // See PR46336.
diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp
index 74aff0b..dbc7cbc 100644
--- a/clang/lib/Parse/ParseTemplate.cpp
+++ b/clang/lib/Parse/ParseTemplate.cpp
@@ -196,7 +196,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclarationAfterTemplate(
   ParsingDeclSpec DS(*this, &DiagsFromTParams);
   DS.SetRangeStart(DeclSpecAttrs.Range.getBegin());
   DS.SetRangeEnd(DeclSpecAttrs.Range.getEnd());
-  DS.takeAttributesFrom(DeclSpecAttrs);
+  DS.takeAttributesAppendingingFrom(DeclSpecAttrs);
 
   ParseDeclarationSpecifiers(DS, TemplateInfo, AS,
                              getDeclSpecContextFromDeclaratorContext(Context));
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index a17398b..bbff627 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -1083,7 +1083,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclOrFunctionDefInternal(
          "expected uninitialised source range");
   DS.SetRangeStart(DeclSpecAttrs.Range.getBegin());
   DS.SetRangeEnd(DeclSpecAttrs.Range.getEnd());
-  DS.takeAttributesFrom(DeclSpecAttrs);
+  DS.takeAttributesAppendingingFrom(DeclSpecAttrs);
 
   ParsedTemplateInfo TemplateInfo;
   MaybeParseMicrosoftAttributes(DS.getAttributes());
@@ -1155,7 +1155,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclOrFunctionDefInternal(
     }
 
     DS.abort();
-    DS.takeAttributesFrom(Attrs);
+    DS.takeAttributesAppendingingFrom(Attrs);
 
     const char *PrevSpec = nullptr;
     unsigned DiagID;
diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp
index 8756ce5..184d31e 100644
--- a/clang/lib/Sema/DeclSpec.cpp
+++ b/clang/lib/Sema/DeclSpec.cpp
@@ -197,7 +197,7 @@ DeclaratorChunk DeclaratorChunk::getFunction(bool hasProto,
         [&](DeclSpec::TQ TypeQual, StringRef PrintName, SourceLocation SL) {
           I.Fun.MethodQualifiers->SetTypeQual(TypeQual, SL);
         });
-    I.Fun.MethodQualifiers->getAttributes().takeAllFrom(attrs);
+    I.Fun.MethodQualifiers->getAttributes().takeAllPrependingFrom(attrs);
     I.Fun.MethodQualifiers->getAttributePool().takeAllFrom(attrs.getPool());
   }
 
diff --git a/clang/lib/Sema/ParsedAttr.cpp b/clang/lib/Sema/ParsedAttr.cpp
index 294f88e..2b5ad33 100644
--- a/clang/lib/Sema/ParsedAttr.cpp
+++ b/clang/lib/Sema/ParsedAttr.cpp
@@ -304,7 +304,7 @@ bool ParsedAttr::checkAtMostNumArgs(Sema &S, unsigned Num) const {
 void clang::takeAndConcatenateAttrs(ParsedAttributes &First,
                                     ParsedAttributes &&Second) {
 
-  First.takeAllAtEndFrom(Second);
+  First.takeAllAppendingFrom(Second);
 
   if (!First.Range.getBegin().isValid())
     First.Range.setBegin(Second.Range.getBegin());
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 6eaf7b9..0e83c20 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -14637,7 +14637,7 @@ StmtResult Sema::ActOnCXXForRangeIdentifier(Scope *S, SourceLocation IdentLoc,
 
   Declarator D(DS, ParsedAttributesView::none(), DeclaratorContext::ForInit);
   D.SetIdentifier(Ident, IdentLoc);
-  D.takeAttributes(Attrs);
+  D.takeAttributesAppending(Attrs);
 
   D.AddTypeInfo(DeclaratorChunk::getReference(0, IdentLoc, /*lvalue*/ false),
                 IdentLoc);
diff --git a/clang/test/Sema/internal_linkage.c b/clang/test/Sema/internal_linkage.c
index a1bff73..4ea8599 100644
--- a/clang/test/Sema/internal_linkage.c
+++ b/clang/test/Sema/internal_linkage.c
@@ -20,7 +20,7 @@ struct __attribute__((internal_linkage)) S { // expected-warning{{'internal_link
 __attribute__((internal_linkage("foo"))) int g(void) {} // expected-error{{'internal_linkage' attribute takes no arguments}}
 
 int var6 [[clang::internal_linkage]];
-int var7 [[clang::internal_linkage]] __attribute__((common)); // expected-error{{'clang::internal_linkage' and 'common' attributes are not compatible}} \
+int var7 [[clang::internal_linkage]] __attribute__((common)); // expected-error{{'common' and 'clang::internal_linkage' attributes are not compatible}} \
                                                    // expected-note{{conflicting attribute is here}}
 __attribute__((common)) int var8 [[clang::internal_linkage]]; // expected-error{{'clang::internal_linkage' and 'common' attributes are not compatible}} \
                                                    // expected-note{{conflicting attribute is here}}
diff --git a/clang/test/SemaOpenCL/address-spaces.cl b/clang/test/SemaOpenCL/address-spaces.cl
index 86435b7..6c3ee89 100644
--- a/clang/test/SemaOpenCL/address-spaces.cl
+++ b/clang/test/SemaOpenCL/address-spaces.cl
@@ -265,7 +265,8 @@ void func_multiple_addr2(void) {
   __attribute__((opencl_private)) private_int_t var5;  // expected-warning {{multiple identical address spaces specified for type}}
   __attribute__((opencl_private)) private_int_t *var6; // expected-warning {{multiple identical address spaces specified for type}}
 #if __OPENCL_CPP_VERSION__
-  __global int [[clang::opencl_private]] var7;         // expected-error {{multiple address spaces specified for type}}
+  __global int [[clang::opencl_private]] var7;         // expected-error {{multiple address spaces specified for type}} \
+                                                       // expected-error {{function scope variable cannot be declared in global address space}}
   __global int [[clang::opencl_private]] *var8;        // expected-error {{multiple address spaces specified for type}}
   private_int_t [[clang::opencl_private]] var9;        // expected-warning {{multiple identical address spaces specified for type}}
   private_int_t [[clang::opencl_private]] *var10;      // expected-warning {{multiple identical address spaces specified for type}}
diff --git a/libcxx/docs/TestingLibcxx.rst b/libcxx/docs/TestingLibcxx.rst
index 6171629..dbe6948 100644
--- a/libcxx/docs/TestingLibcxx.rst
+++ b/libcxx/docs/TestingLibcxx.rst
@@ -419,10 +419,10 @@ writing tests easier. See `libc++-specific Lit Directives`_ for more information
      - ``// FILE_DEPENDENCIES: file, directory, /path/to/file, ...``
      - The paths given to the ``FILE_DEPENDENCIES`` directive can specify directories or specific files upon which a given test depend. For example, a test that requires some test
        input stored in a data file would use this libc++-specific Lit directive. When a test file contains the ``FILE_DEPENDENCIES`` directive, Lit will collect the named files and copy
-       them to the directory represented by the ``%T`` substitution before the test executes. The copy is performed from the directory represented by the ``%S`` substitution
+       them to the directory represented by the ``%{temp}`` substitution before the test executes. The copy is performed from the directory represented by the ``%S`` substitution
        (i.e. the source directory of the test being executed) which makes it possible to use relative paths to specify the location of dependency files. After Lit copies
-       all the dependent files to the directory specified by the ``%T`` substitution, that directory should contain *all* the necessary inputs to run. In other words,
-       it should be possible to copy the contents of the directory specified by the ``%T`` substitution to a remote host where the execution of the test will actually occur.
+       all the dependent files to the directory specified by the ``%{temp}`` substitution, that directory should contain *all* the necessary inputs to run. In other words,
+       it should be possible to copy the contents of the directory specified by the ``%{temp}`` substitution to a remote host where the execution of the test will actually occur.
    * - ``ADDITIONAL_COMPILE_FLAGS``
      - ``// ADDITIONAL_COMPILE_FLAGS: flag1 flag2 ...``
      - The additional compiler flags specified by a space-separated list to the ``ADDITIONAL_COMPILE_FLAGS`` libc++-specific Lit directive will be added to the end of the ``%{compile_flags}``
diff --git a/libcxx/include/string b/libcxx/include/string
index f5e05d8..8f80afbc 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1312,6 +1312,8 @@ public:
 #  if _LIBCPP_STD_VER >= 23
   template <class _Op>
   _LIBCPP_HIDE_FROM_ABI constexpr void resize_and_overwrite(size_type __n, _Op __op) {
+    using __result_type = decltype(std::move(__op)(data(), auto(__n)));
+    static_assert(__integer_like<__result_type>, "Operation return type must be integer-like");
     size_type __sz  = size();
     size_type __cap = capacity();
     if (__n > __cap)
diff --git a/libcxx/test/benchmarks/spec.gen.py b/libcxx/test/benchmarks/spec.gen.py
index c36dd0a..f355b2c 100644
--- a/libcxx/test/benchmarks/spec.gen.py
+++ b/libcxx/test/benchmarks/spec.gen.py
@@ -8,13 +8,13 @@
 
 # REQUIRES: enable-spec-benchmarks
 
-# RUN: mkdir -p %T
-# RUN: echo "%{cxx}" > %T/cxx.subs
-# RUN: echo "%{compile_flags}" > %T/compile_flags.subs
-# RUN: echo "%{flags}" > %T/flags.subs
-# RUN: echo "%{link_flags}" > %T/link_flags.subs
-# RUN: echo "%{spec_dir}" > %T/spec_dir.subs
-# RUN: %{python} %s %T
+# RUN: mkdir -p %{temp}
+# RUN: echo "%{cxx}" > %{temp}/cxx.subs
+# RUN: echo "%{compile_flags}" > %{temp}/compile_flags.subs
+# RUN: echo "%{flags}" > %{temp}/flags.subs
+# RUN: echo "%{link_flags}" > %{temp}/link_flags.subs
+# RUN: echo "%{spec_dir}" > %{temp}/spec_dir.subs
+# RUN: %{python} %s %{temp}
 # END.
 
 import json
@@ -66,18 +66,18 @@ spec_benchmarks &= no_fortran
 
 for benchmark in spec_benchmarks:
     print(f'#--- {benchmark}.sh.test')
-    print(f'RUN: rm -rf %T') # clean up any previous (potentially incomplete) run
-    print(f'RUN: mkdir %T')
-    print(f'RUN: cp {spec_config} %T/spec-config.cfg')
-    print(f'RUN: %{{spec_dir}}/bin/runcpu --config %T/spec-config.cfg --size train --output-root %T --rebuild {benchmark}')
-    print(f'RUN: rm -rf %T/benchspec') # remove the temporary directory, which can become quite large
+    print(f'RUN: rm -rf %{{temp}}') # clean up any previous (potentially incomplete) run
+    print(f'RUN: mkdir %{{temp}}')
+    print(f'RUN: cp {spec_config} %{{temp}}/spec-config.cfg')
+    print(f'RUN: %{{spec_dir}}/bin/runcpu --config %{{temp}}/spec-config.cfg --size train --output-root %{{temp}} --rebuild {benchmark}')
+    print(f'RUN: rm -rf %{{temp}}/benchspec') # remove the temporary directory, which can become quite large
 
     # The `runcpu` command above doesn't fail even if the benchmark fails to run. To determine failure, parse the CSV
     # results and ensure there are no compilation errors or runtime errors in the status row. Also print the logs and
     # fail if there are no CSV files at all, which implies a SPEC error.
-    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results --extract "Base Status" --keep-failed %T/result/*.train.csv > %T/status || ! cat %T/result/*.log')
-    print(f'RUN: ! grep -E "CE|RE" %T/status || ! cat %T/result/*.log')
+    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results --extract "Base Status" --keep-failed %{{temp}}/result/*.train.csv > %{{temp}}/status || ! cat %{{temp}}/result/*.log')
+    print(f'RUN: ! grep -E "CE|RE" %{{temp}}/status || ! cat %{{temp}}/result/*.log')
 
     # If there were no errors, parse the results into LNT-compatible format and print them.
-    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results %T/result/*.train.csv --output-format=lnt > %T/results.lnt')
-    print(f'RUN: cat %T/results.lnt')
+    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results %{{temp}}/result/*.train.csv --output-format=lnt > %{{temp}}/results.lnt')
+    print(f'RUN: cat %{{temp}}/results.lnt')
diff --git a/libcxx/test/configs/apple-libc++-shared.cfg.in b/libcxx/test/configs/apple-libc++-shared.cfg.in
index 5504bfd..a361b2b 100644
--- a/libcxx/test/configs/apple-libc++-shared.cfg.in
+++ b/libcxx/test/configs/apple-libc++-shared.cfg.in
@@ -38,7 +38,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib-dir} -lc++ %{apple-system-shims}'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T --env DYLD_LIBRARY_PATH=%{lib-dir} -- '
+    '%{executor} --execdir %{temp} --env DYLD_LIBRARY_PATH=%{lib-dir} -- '
 ))
 
 config.stdlib = 'apple-libc++'
diff --git a/libcxx/test/configs/apple-libc++-system.cfg.in b/libcxx/test/configs/apple-libc++-system.cfg.in
index b59506f..e87f920 100644
--- a/libcxx/test/configs/apple-libc++-system.cfg.in
+++ b/libcxx/test/configs/apple-libc++-system.cfg.in
@@ -19,7 +19,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib-dir} -lc++'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 config.stdlib = 'apple-libc++'
diff --git a/libcxx/test/configs/armv7m-picolibc-libc++.cfg.in b/libcxx/test/configs/armv7m-picolibc-libc++.cfg.in
index b2669a7..f0782c2 100644
--- a/libcxx/test/configs/armv7m-picolibc-libc++.cfg.in
+++ b/libcxx/test/configs/armv7m-picolibc-libc++.cfg.in
@@ -30,7 +30,7 @@ config.executor = (
     ' --cpu cortex-m3')
 config.substitutions.append(('%{exec}',
     '%{executor}'
-    ' --execdir %T'
+    ' --execdir %{temp}'
 ))
 
 import os, site
diff --git a/libcxx/test/configs/ibm-libc++-shared.cfg.in b/libcxx/test/configs/ibm-libc++-shared.cfg.in
index 0f86e74..c24c729 100644
--- a/libcxx/test/configs/ibm-libc++-shared.cfg.in
+++ b/libcxx/test/configs/ibm-libc++-shared.cfg.in
@@ -18,7 +18,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib-dir} -lc++ -lc++abi -latomic -Wl,-bbigtoc'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T --env LIBPATH=%{lib-dir} -- '
+    '%{executor} --execdir %{temp} --env LIBPATH=%{lib-dir} -- '
 ))
 
 # LIBCXX-AIX-FIXME is the feature name used to XFAIL the
diff --git a/libcxx/test/configs/llvm-libc++-android.cfg.in b/libcxx/test/configs/llvm-libc++-android.cfg.in
index 9362c68..96c952d 100644
--- a/libcxx/test/configs/llvm-libc++-android.cfg.in
+++ b/libcxx/test/configs/llvm-libc++-android.cfg.in
@@ -36,7 +36,7 @@ config.substitutions.append(('%{link_flags}',
 config.substitutions.append(('%{exec}',
     '%{executor}' +
     ' --job-limit-socket ' + libcxx.test.android.adb_job_limit_socket() +
-    ' --prepend-path-env LD_LIBRARY_PATH /data/local/tmp/libc++ --execdir %T -- '
+    ' --prepend-path-env LD_LIBRARY_PATH /data/local/tmp/libc++ --execdir %{temp} -- '
 ))
 
 libcxx.test.config.configure(
diff --git a/libcxx/test/configs/llvm-libc++-mingw.cfg.in b/libcxx/test/configs/llvm-libc++-mingw.cfg.in
index 01c4d58..4473171 100644
--- a/libcxx/test/configs/llvm-libc++-mingw.cfg.in
+++ b/libcxx/test/configs/llvm-libc++-mingw.cfg.in
@@ -11,7 +11,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib-dir} -lc++'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T --prepend_env PATH=%{install-prefix}/bin -- '
+    '%{executor} --execdir %{temp} --prepend_env PATH=%{install-prefix}/bin -- '
 ))
 
 import os, site
diff --git a/libcxx/test/configs/llvm-libc++-shared-clangcl.cfg.in b/libcxx/test/configs/llvm-libc++-shared-clangcl.cfg.in
index 5fa99bc..e6186a7 100644
--- a/libcxx/test/configs/llvm-libc++-shared-clangcl.cfg.in
+++ b/libcxx/test/configs/llvm-libc++-shared-clangcl.cfg.in
@@ -25,7 +25,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib -L %{lib-dir} -lc++ -l' + cxx_lib
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T --prepend_env PATH=%{install-prefix}/bin -- '
+    '%{executor} --execdir %{temp} --prepend_env PATH=%{install-prefix}/bin -- '
 ))
 
 import os, site
diff --git a/libcxx/test/configs/llvm-libc++-shared-gcc.cfg.in b/libcxx/test/configs/llvm-libc++-shared-gcc.cfg.in
index 649bd31..1b4ddc2 100644
--- a/libcxx/test/configs/llvm-libc++-shared-gcc.cfg.in
+++ b/libcxx/test/configs/llvm-libc++-shared-gcc.cfg.in
@@ -12,7 +12,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib-dir} -Wl,-rpath,%{lib-dir} -lc++ -lm'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 import os, site
diff --git a/libcxx/test/configs/llvm-libc++-shared-no-vcruntime-clangcl.cfg.in b/libcxx/test/configs/llvm-libc++-shared-no-vcruntime-clangcl.cfg.in
index e95999d..476c5ca 100644
--- a/libcxx/test/configs/llvm-libc++-shared-no-vcruntime-clangcl.cfg.in
+++ b/libcxx/test/configs/llvm-libc++-shared-no-vcruntime-clangcl.cfg.in
@@ -26,7 +26,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib -L %{lib-dir} -lc++ -l' + cxx_lib
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T --prepend_env PATH=%{install-prefix}/bin -- '
+    '%{executor} --execdir %{temp} --prepend_env PATH=%{install-prefix}/bin -- '
 ))
 
 import os, site
diff --git a/libcxx/test/configs/llvm-libc++-shared.cfg.in b/libcxx/test/configs/llvm-libc++-shared.cfg.in
index 0c059f0..6d4a383 100644
--- a/libcxx/test/configs/llvm-libc++-shared.cfg.in
+++ b/libcxx/test/configs/llvm-libc++-shared.cfg.in
@@ -13,7 +13,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib-dir} -Wl,-rpath,%{lib-dir} -lc++'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 import os, site
diff --git a/libcxx/test/configs/llvm-libc++-static-clangcl.cfg.in b/libcxx/test/configs/llvm-libc++-static-clangcl.cfg.in
index 4b6b3fc..2f2b420 100644
--- a/libcxx/test/configs/llvm-libc++-static-clangcl.cfg.in
+++ b/libcxx/test/configs/llvm-libc++-static-clangcl.cfg.in
@@ -25,7 +25,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib -L %{lib-dir} -llibc++ -l' + cxx_lib
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 import os, site
diff --git a/libcxx/test/configs/llvm-libc++-static.cfg.in b/libcxx/test/configs/llvm-libc++-static.cfg.in
index 097cc4d..45a44c1 100644
--- a/libcxx/test/configs/llvm-libc++-static.cfg.in
+++ b/libcxx/test/configs/llvm-libc++-static.cfg.in
@@ -13,7 +13,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib-dir} -lc++ -lc++abi'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 import os, site
diff --git a/libcxx/test/configs/stdlib-libstdc++.cfg.in b/libcxx/test/configs/stdlib-libstdc++.cfg.in
index 3ff0c54..1a4b47c 100644
--- a/libcxx/test/configs/stdlib-libstdc++.cfg.in
+++ b/libcxx/test/configs/stdlib-libstdc++.cfg.in
@@ -51,7 +51,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{libstdcxx-install-prefix}/lib/gcc/%{libstdcxx-version} -Wl,-rpath,%{libstdcxx-install-prefix}/lib/gcc/%{libstdcxx-version} -lstdc++'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 import os, site
diff --git a/libcxx/test/configs/stdlib-native.cfg.in b/libcxx/test/configs/stdlib-native.cfg.in
index 3e25c1e..b827155f 100644
--- a/libcxx/test/configs/stdlib-native.cfg.in
+++ b/libcxx/test/configs/stdlib-native.cfg.in
@@ -11,7 +11,7 @@ config.substitutions.append(('%{flags}',
 ))
 config.substitutions.append(('%{compile_flags}', '-I %{libcxx-dir}/test/support'))
 config.substitutions.append(('%{link_flags}', ''))
-config.substitutions.append(('%{exec}', '%{executor} --execdir %T -- '))
+config.substitutions.append(('%{exec}', '%{executor} --execdir %{temp} -- '))
 
 import os, site
 site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils'))
diff --git a/libcxx/test/selftest/dsl/dsl.sh.py b/libcxx/test/selftest/dsl/dsl.sh.py
index 6d4406b..93f351f 100644
--- a/libcxx/test/selftest/dsl/dsl.sh.py
+++ b/libcxx/test/selftest/dsl/dsl.sh.py
@@ -12,7 +12,7 @@
 
 # Note: We prepend arguments with 'x' to avoid thinking there are too few
 #       arguments in case an argument is an empty string.
-# RUN: %{python} %s x%S x%T x%{substitutions}
+# RUN: %{python} %s x%S x%{temp} x%{substitutions}
 
 import base64
 import copy
diff --git a/libcxx/test/selftest/dsl/lit.local.cfg b/libcxx/test/selftest/dsl/lit.local.cfg
index 352719b..dc6887a 100644
--- a/libcxx/test/selftest/dsl/lit.local.cfg
+++ b/libcxx/test/selftest/dsl/lit.local.cfg
@@ -1,8 +1,8 @@
 # Since we try to pass substitutions as-is to some tests, we must "escape"
 # them in case they contain other substitutions. Otherwise, the substitutions
 # will be fully expanded when passed to the tests. For example, we want an
-# %{exec} substitution that contains `--execdir %T` to be passed as-is, without
-# substituting the directory. This way, the test itself can populate %T as it
+# %{exec} substitution that contains `--execdir %{temp}` to be passed as-is, without
+# substituting the directory. This way, the test itself can populate %{temp} as it
 # sees fit, and %{exec} will respect it.
 #
 # To solve this problem, we pickle the substitutions and base64 encode that
diff --git a/libcxx/test/selftest/file_dependencies/absolute-and-relative-paths.sh.cpp b/libcxx/test/selftest/file_dependencies/absolute-and-relative-paths.sh.cpp
index ac52f67..68283f6 100644
--- a/libcxx/test/selftest/file_dependencies/absolute-and-relative-paths.sh.cpp
+++ b/libcxx/test/selftest/file_dependencies/absolute-and-relative-paths.sh.cpp
@@ -9,7 +9,7 @@
 // Make sure that FILE_DEPENDENCIES work with relative AND absolute paths.
 
 // FILE_DEPENDENCIES: %S/a.txt
-// RUN: test -e %T/a.txt
+// RUN: test -e %{temp}/a.txt
 
 // FILE_DEPENDENCIES: dir/b.txt
-// RUN: test -e %T/b.txt
+// RUN: test -e %{temp}/b.txt
diff --git a/libcxx/test/selftest/file_dependencies/substitute-in-dependencies.sh.cpp b/libcxx/test/selftest/file_dependencies/substitute-in-dependencies.sh.cpp
index c63684c..59de718 100644
--- a/libcxx/test/selftest/file_dependencies/substitute-in-dependencies.sh.cpp
+++ b/libcxx/test/selftest/file_dependencies/substitute-in-dependencies.sh.cpp
@@ -9,4 +9,4 @@
 // Make sure that lit substitutions are expanded inside FILE_DEPENDENCIES lines.
 
 // FILE_DEPENDENCIES: %s
-// RUN: test -e %T/substitute-in-dependencies.sh.cpp
+// RUN: test -e %{temp}/substitute-in-dependencies.sh.cpp
diff --git a/libcxx/test/selftest/tmpdir-exists.sh.cpp b/libcxx/test/selftest/tmpdir-exists.sh.cpp
index 7f9e69d..4edd165 100644
--- a/libcxx/test/selftest/tmpdir-exists.sh.cpp
+++ b/libcxx/test/selftest/tmpdir-exists.sh.cpp
@@ -6,6 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// Make sure that the directory represented by %T exists when we run the test.
+// Make sure that the directory represented by %{temp} exists when we run the test.
 
-// RUN: test -d %T
+// RUN: test -d %{temp}
diff --git a/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp
index abd2848..e5c1963 100644
--- a/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp
@@ -21,6 +21,7 @@
 #include "make_string.h"
 #include "test_macros.h"
 #include "asan_testing.h"
+#include "type_algorithms.h"
 
 template <class S>
 constexpr void test_appending(std::size_t k, size_t N, size_t new_capacity) {
@@ -77,17 +78,30 @@ constexpr bool test() {
   return true;
 }
 
-void test_value_categories() {
+constexpr bool test_value_categories() {
   std::string s;
   s.resize_and_overwrite(10, [](char*&&, std::size_t&&) { return 0; });
   LIBCPP_ASSERT(is_string_asan_correct(s));
   s.resize_and_overwrite(10, [](char* const&, const std::size_t&) { return 0; });
   LIBCPP_ASSERT(is_string_asan_correct(s));
   struct RefQualified {
-    int operator()(char*, std::size_t) && { return 0; }
+    constexpr int operator()(char*, std::size_t) && { return 0; }
   };
   s.resize_and_overwrite(10, RefQualified{});
   LIBCPP_ASSERT(is_string_asan_correct(s));
+  return true;
+}
+
+constexpr bool test_integer_like_return_types() {
+  types::for_each(types::integer_types(), []<typename IntegerType> {
+    std::string s;
+    s.resize_and_overwrite(10, [](char* p, std::size_t n) -> IntegerType {
+      std::fill(p, p + n, 'f');
+      return n;
+    });
+    assert(s.size() == 10);
+  });
+  return true;
 }
 
 int main(int, char**) {
@@ -105,5 +119,12 @@ int main(int, char**) {
   test<std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t>>>();
   static_assert(test<std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t>>>());
 #endif
+
+  test_value_categories();
+  test_integer_like_return_types();
+
+  static_assert(test_value_categories());
+  static_assert(test_integer_like_return_types());
+
   return 0;
 }
diff --git a/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.verify.cpp b/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.verify.cpp
new file mode 100644
index 0000000..323f49b
--- /dev/null
+++ b/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.verify.cpp
@@ -0,0 +1,40 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: std-at-least-c++23
+
+// <string>
+
+// template<class Operation>
+// void resize_and_overwrite(size_type n, Operation op)
+
+// Verify that the operation's return type must be integer-like
+
+#include <string>
+
+void test_bool_return_type() {
+  std::string s;
+  s.resize_and_overwrite(10, [](char*, std::size_t) {
+    return true; // expected-error-re@*:* {{{{(static_assertion|static assertion)}}{{.*}}integer-like}}
+  });
+}
+
+void test_pointer_return_type() {
+  std::string s;
+  s.resize_and_overwrite(10, [](char* p, std::size_t) {
+    return p; // expected-error-re@*:* {{{{(static_assertion|static assertion)}}{{.*}}integer-like}}
+              // expected-error@*:* {{cannot initialize}}
+  });
+}
+
+void test_float_return_type() {
+  std::string s;
+  s.resize_and_overwrite(10, [](char*, std::size_t) {
+    return 5.0f; // expected-error-re@*:* {{{{(static_assertion|static assertion)}}{{.*}}integer-like}}
+  });
+}
diff --git a/libcxx/utils/libcxx/test/dsl.py b/libcxx/utils/libcxx/test/dsl.py
index 2d3a72c..3fb30d8 100644
--- a/libcxx/utils/libcxx/test/dsl.py
+++ b/libcxx/utils/libcxx/test/dsl.py
@@ -111,8 +111,8 @@ def _makeConfigTest(config):
             os.makedirs(supportDir)
 
     # Create a dummy test suite and single dummy test inside it. As part of
-    # the Lit configuration, automatically do the equivalent of 'mkdir %T'
-    # and 'rm -r %T' to avoid cluttering the build directory.
+    # the Lit configuration, automatically do the equivalent of 'mkdir %{temp}'
+    # and 'rm -r %{temp}' to avoid cluttering the build directory.
     suite = lit.Test.TestSuite("__config__", sourceRoot, execRoot, config)
     tmp = tempfile.NamedTemporaryFile(dir=sourceRoot, delete=False, suffix=".cpp")
     tmp.close()
diff --git a/libcxx/utils/libcxx/test/format.py b/libcxx/utils/libcxx/test/format.py
index c9dffd1..3fcd250 100644
--- a/libcxx/utils/libcxx/test/format.py
+++ b/libcxx/utils/libcxx/test/format.py
@@ -17,10 +17,10 @@ LIBCXX_UTILS = os.path.dirname(os.path.dirname(os.path.dirname(THIS_FILE)))
 
 def _getTempPaths(test):
     """
-    Return the values to use for the %T and %t substitutions, respectively.
+    Return the values to use for the %{temp} and %t substitutions, respectively.
 
     The difference between this and Lit's default behavior is that we guarantee
-    that %T is a path unique to the test being run.
+    that %{temp} is a path unique to the test being run.
     """
     tmpDir, _ = lit.TestRunner.getTempPaths(test)
     _, testName = os.path.split(test.getExecPath())
@@ -92,7 +92,7 @@ def parseScript(test, preamble):
     #       errors, which doesn't make sense for clang-verify tests because we may want to check
     #       for specific warning diagnostics.
     _checkBaseSubstitutions(substitutions)
-    substitutions.append(("%T", tmpDir))
+    substitutions.append(("%{temp}", tmpDir))
     substitutions.append(
         ("%{build}", "%{cxx} %s %{flags} %{compile_flags} %{link_flags} -o %t.exe")
     )
@@ -150,7 +150,7 @@ def parseScript(test, preamble):
     # that file to the execution directory. Execute the copy from %S to allow
     # relative paths from the test directory.
     for dep in fileDependencies:
-        script += ["%dbg(SETUP) cd %S && cp {} %T".format(dep)]
+        script += ["%dbg(SETUP) cd %S && cp {} %{{temp}}".format(dep)]
     script += preamble
     script += scriptInTest
 
@@ -178,11 +178,11 @@ def parseScript(test, preamble):
                 "%dbg(MODULE std.compat) %{cxx} %{flags} "
                 f"{compileFlags} "
                 "-Wno-reserved-module-identifier -Wno-reserved-user-defined-literal "
-                "-fmodule-file=std=%T/std.pcm " # The std.compat module imports std.
-                "--precompile -o %T/std.compat.pcm -c %{module-dir}/std.compat.cppm",
+                "-fmodule-file=std=%{temp}/std.pcm " # The std.compat module imports std.
+                "--precompile -o %{temp}/std.compat.pcm -c %{module-dir}/std.compat.cppm",
             )
             moduleCompileFlags.extend(
-                ["-fmodule-file=std.compat=%T/std.compat.pcm", "%T/std.compat.pcm"]
+                ["-fmodule-file=std.compat=%{temp}/std.compat.pcm", "%{temp}/std.compat.pcm"]
             )
 
         # Make sure the std module is built before std.compat. Libc++'s
@@ -195,9 +195,9 @@ def parseScript(test, preamble):
             "%dbg(MODULE std) %{cxx} %{flags} "
             f"{compileFlags} "
             "-Wno-reserved-module-identifier -Wno-reserved-user-defined-literal "
-            "--precompile -o %T/std.pcm -c %{module-dir}/std.cppm",
+            "--precompile -o %{temp}/std.pcm -c %{module-dir}/std.cppm",
         )
-        moduleCompileFlags.extend(["-fmodule-file=std=%T/std.pcm", "%T/std.pcm"])
+        moduleCompileFlags.extend(["-fmodule-file=std=%{temp}/std.pcm", "%{temp}/std.pcm"])
 
         # Add compile flags required for the modules.
         substitutions = config._appendToSubstitution(
@@ -261,6 +261,10 @@ class CxxStandardLibraryTest(lit.formats.FileBasedTest):
         %{run}
             Equivalent to `%{exec} %t.exe`. This is intended to be used
             in conjunction with the %{build} substitution.
+
+        %{temp}
+            This substitution expands to a non-existent temporary path unique to the test.
+            It is typically used to create a temporary directory.
     """
 
     def getTestsForPath(self, testSuite, pathInSuite, litConfig, localConfig):
@@ -355,9 +359,9 @@ class CxxStandardLibraryTest(lit.formats.FileBasedTest):
                 "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} %{benchmark_flags} %{link_flags} -o %t.exe",
             ]
             if "enable-benchmarks=run" in test.config.available_features:
-                steps += ["%dbg(EXECUTED AS) %{exec} %t.exe --benchmark_out=%T/benchmark-result.json --benchmark_out_format=json"]
+                steps += ["%dbg(EXECUTED AS) %{exec} %t.exe --benchmark_out=%{temp}/benchmark-result.json --benchmark_out_format=json"]
                 parse_results = os.path.join(LIBCXX_UTILS, 'parse-google-benchmark-results')
-                steps += [f"{parse_results} %T/benchmark-result.json --output-format=lnt > %T/results.lnt"]
+                steps += [f"{parse_results} %{temp}/benchmark-result.json --output-format=lnt > %{temp}/results.lnt"]
             return self._executeShTest(test, litConfig, steps)
         elif re.search('[.]gen[.][^.]+$', filename): # This only happens when a generator test is not supported
             return self._executeShTest(test, litConfig, [])
diff --git a/libcxx/utils/ssh.py b/libcxx/utils/ssh.py
index ec16efc..77e79ce 100755
--- a/libcxx/utils/ssh.py
+++ b/libcxx/utils/ssh.py
@@ -57,7 +57,7 @@ def main():
         return subprocess.run(command, *args_, **kwargs)
 
     # Create a temporary directory where the test will be run.
-    # That is effectively the value of %T on the remote host.
+    # That is effectively the value of %{temp} on the remote host.
     tmp = runCommand(
         ssh("mktemp -d {}/libcxx.XXXXXXXXXX".format(args.tempdir)),
         universal_newlines=True,
diff --git a/libcxx/utils/test-at-commit b/libcxx/utils/test-at-commit
index d62643d..f20bf5f 100755
--- a/libcxx/utils/test-at-commit
+++ b/libcxx/utils/test-at-commit
@@ -22,7 +22,7 @@ config.substitutions.append(('%{{flags}}',
 ))
 config.substitutions.append(('%{{compile_flags}}', '-nostdinc++ -I {INSTALL_ROOT}/include/c++/v1 -I %{{libcxx-dir}}/test/support'))
 config.substitutions.append(('%{{link_flags}}', '-nostdlib++ -L {INSTALL_ROOT}/lib -Wl,-rpath,{INSTALL_ROOT}/lib -lc++'))
-config.substitutions.append(('%{{exec}}', '%{{executor}} --execdir %T -- '))
+config.substitutions.append(('%{{exec}}', '%{{executor}} --execdir %{{temp}} -- '))
 
 import os, site
 site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils'))
diff --git a/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in b/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in
index 537fe82..0da074b 100644
--- a/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in
+++ b/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in
@@ -36,7 +36,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib} -lc++ %{apple-system-shims}'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T --env DYLD_LIBRARY_PATH=%{lib} -- '
+    '%{executor} --execdir %{temp} --env DYLD_LIBRARY_PATH=%{lib} -- '
 ))
 
 config.stdlib = 'apple-libc++'
diff --git a/libcxxabi/test/configs/apple-libc++abi-system.cfg.in b/libcxxabi/test/configs/apple-libc++abi-system.cfg.in
index 1e80eee..0e62e03 100644
--- a/libcxxabi/test/configs/apple-libc++abi-system.cfg.in
+++ b/libcxxabi/test/configs/apple-libc++abi-system.cfg.in
@@ -20,7 +20,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib} -lc++'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 config.stdlib = 'apple-libc++'
diff --git a/libcxxabi/test/configs/armv7m-picolibc-libc++abi.cfg.in b/libcxxabi/test/configs/armv7m-picolibc-libc++abi.cfg.in
index 0594ba4..e61efbb 100644
--- a/libcxxabi/test/configs/armv7m-picolibc-libc++abi.cfg.in
+++ b/libcxxabi/test/configs/armv7m-picolibc-libc++abi.cfg.in
@@ -25,7 +25,7 @@ config.executor = (
     ' --cpu cortex-m3')
 config.substitutions.append(('%{exec}',
     '%{executor}'
-    ' --execdir %T'
+    ' --execdir %{temp}'
 ))
 
 import os, site
diff --git a/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in b/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in
index bd6c1fb..238cfdb 100644
--- a/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in
+++ b/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in
@@ -19,7 +19,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib} -lc++ -lc++abi -Wl,-bbigtoc'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T --env LIBPATH=%{lib} -- '
+    '%{executor} --execdir %{temp} --env LIBPATH=%{lib} -- '
 ))
 
 import os, site
diff --git a/libcxxabi/test/configs/llvm-libc++abi-android.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-android.cfg.in
index bc58446..e8bc3c9 100644
--- a/libcxxabi/test/configs/llvm-libc++abi-android.cfg.in
+++ b/libcxxabi/test/configs/llvm-libc++abi-android.cfg.in
@@ -29,7 +29,7 @@ config.substitutions.append(('%{link_flags}',
 config.substitutions.append(('%{exec}',
     '%{executor}' +
     ' --job-limit-socket ' + libcxx.test.android.adb_job_limit_socket() +
-    ' --prepend-path-env LD_LIBRARY_PATH /data/local/tmp/libc++ --execdir %T -- '
+    ' --prepend-path-env LD_LIBRARY_PATH /data/local/tmp/libc++ --execdir %{temp} -- '
 ))
 
 libcxx.test.config.configure(
diff --git a/libcxxabi/test/configs/llvm-libc++abi-merged.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-merged.cfg.in
index c6fa430..0aecc44 100644
--- a/libcxxabi/test/configs/llvm-libc++abi-merged.cfg.in
+++ b/libcxxabi/test/configs/llvm-libc++abi-merged.cfg.in
@@ -13,7 +13,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib} -Wl,-rpath,%{lib} -lc++ -pthread'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 import os, site
diff --git a/libcxxabi/test/configs/llvm-libc++abi-mingw.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-mingw.cfg.in
index 0d8b7bd..14f7ab8 100644
--- a/libcxxabi/test/configs/llvm-libc++abi-mingw.cfg.in
+++ b/libcxxabi/test/configs/llvm-libc++abi-mingw.cfg.in
@@ -11,7 +11,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib} -lc++'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T --prepend_env PATH=%{install-prefix}/bin -- '
+    '%{executor} --execdir %{temp} --prepend_env PATH=%{install-prefix}/bin -- '
 ))
 
 import os, site
diff --git a/libcxxabi/test/configs/llvm-libc++abi-shared-clangcl.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-shared-clangcl.cfg.in
index cfdfc0f..01cd2b2 100644
--- a/libcxxabi/test/configs/llvm-libc++abi-shared-clangcl.cfg.in
+++ b/libcxxabi/test/configs/llvm-libc++abi-shared-clangcl.cfg.in
@@ -11,7 +11,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib -L %{lib} -lc++ -lc++abi -lmsvcrt -lmsvcprt -loldnames'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T --prepend_env PATH=%{lib} -- '
+    '%{executor} --execdir %{temp} --prepend_env PATH=%{lib} -- '
 ))
 
 import os, site
diff --git a/libcxxabi/test/configs/llvm-libc++abi-shared.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-shared.cfg.in
index 6b69205..e5f18d5 100644
--- a/libcxxabi/test/configs/llvm-libc++abi-shared.cfg.in
+++ b/libcxxabi/test/configs/llvm-libc++abi-shared.cfg.in
@@ -13,7 +13,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib} -Wl,-rpath,%{lib} -lc++ -lc++abi -pthread'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 import os, site
diff --git a/libcxxabi/test/configs/llvm-libc++abi-static-clangcl.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-static-clangcl.cfg.in
index ba67c8b..9f48389 100644
--- a/libcxxabi/test/configs/llvm-libc++abi-static-clangcl.cfg.in
+++ b/libcxxabi/test/configs/llvm-libc++abi-static-clangcl.cfg.in
@@ -11,7 +11,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib -L %{lib} -llibc++ -llibc++abi -lmsvcrt -lmsvcprt -loldnames'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T --prepend_env PATH=%{lib} -- '
+    '%{executor} --execdir %{temp} --prepend_env PATH=%{lib} -- '
 ))
 
 import os, site
diff --git a/libcxxabi/test/configs/llvm-libc++abi-static.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-static.cfg.in
index 352e2c3..3fca7b3 100644
--- a/libcxxabi/test/configs/llvm-libc++abi-static.cfg.in
+++ b/libcxxabi/test/configs/llvm-libc++abi-static.cfg.in
@@ -13,7 +13,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib} -lc++ -lc++abi -pthread'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 import os, site
diff --git a/libunwind/test/configs/apple-libunwind-system.cfg.in b/libunwind/test/configs/apple-libunwind-system.cfg.in
index e5a7c98..252448a 100644
--- a/libunwind/test/configs/apple-libunwind-system.cfg.in
+++ b/libunwind/test/configs/apple-libunwind-system.cfg.in
@@ -19,7 +19,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib} -lc++ -lunwind'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 config.stdlib = 'apple-libc++'
diff --git a/libunwind/test/configs/armv7m-picolibc-libunwind.cfg.in b/libunwind/test/configs/armv7m-picolibc-libunwind.cfg.in
index fc54900..6ffdd70 100644
--- a/libunwind/test/configs/armv7m-picolibc-libunwind.cfg.in
+++ b/libunwind/test/configs/armv7m-picolibc-libunwind.cfg.in
@@ -25,7 +25,7 @@ config.executor = (
     ' --cpu cortex-m3')
 config.substitutions.append(('%{exec}',
     '%{executor}'
-    ' --execdir %T'
+    ' --execdir %{temp}'
 ))
 
 import os, site
diff --git a/libunwind/test/configs/ibm-libunwind-shared.cfg.in b/libunwind/test/configs/ibm-libunwind-shared.cfg.in
index 2221e0c..99f4a90 100644
--- a/libunwind/test/configs/ibm-libunwind-shared.cfg.in
+++ b/libunwind/test/configs/ibm-libunwind-shared.cfg.in
@@ -17,7 +17,7 @@ config.substitutions.append(('%{link_flags}',
     '-nostdlib++ -L %{lib} -lunwind -ldl -Wl,-bbigtoc'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T --env LIBPATH=%{lib} -- '
+    '%{executor} --execdir %{temp} --env LIBPATH=%{lib} -- '
 ))
 
 import os, site
diff --git a/libunwind/test/configs/llvm-libunwind-merged.cfg.in b/libunwind/test/configs/llvm-libunwind-merged.cfg.in
index fafe129..34950f6 100644
--- a/libunwind/test/configs/llvm-libunwind-merged.cfg.in
+++ b/libunwind/test/configs/llvm-libunwind-merged.cfg.in
@@ -35,7 +35,7 @@ config.substitutions.append(('%{link_flags}',
     '-L %{{lib}} -Wl,-rpath,%{{lib}} -lc++ {}'.format(' '.join(link_flags))
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 import os, site
diff --git a/libunwind/test/configs/llvm-libunwind-shared-mingw.cfg.in b/libunwind/test/configs/llvm-libunwind-shared-mingw.cfg.in
index b29d83f..1e77638 100644
--- a/libunwind/test/configs/llvm-libunwind-shared-mingw.cfg.in
+++ b/libunwind/test/configs/llvm-libunwind-shared-mingw.cfg.in
@@ -11,7 +11,7 @@ config.substitutions.append(('%{link_flags}',
     '-L %{lib} -lunwind'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T --prepend_env PATH=%{install-prefix}/bin -- '
+    '%{executor} --execdir %{temp} --prepend_env PATH=%{install-prefix}/bin -- '
 ))
 
 import os, site
diff --git a/libunwind/test/configs/llvm-libunwind-shared.cfg.in b/libunwind/test/configs/llvm-libunwind-shared.cfg.in
index f3e4092..61d6b61 100644
--- a/libunwind/test/configs/llvm-libunwind-shared.cfg.in
+++ b/libunwind/test/configs/llvm-libunwind-shared.cfg.in
@@ -34,7 +34,7 @@ config.substitutions.append(('%{link_flags}',
     '-L %{{lib}} -Wl,-rpath,%{{lib}} -lunwind {}'.format(' '.join(link_flags))
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 import os, site
diff --git a/libunwind/test/configs/llvm-libunwind-static-mingw.cfg.in b/libunwind/test/configs/llvm-libunwind-static-mingw.cfg.in
index 437c53b..37d20a7 100644
--- a/libunwind/test/configs/llvm-libunwind-static-mingw.cfg.in
+++ b/libunwind/test/configs/llvm-libunwind-static-mingw.cfg.in
@@ -11,7 +11,7 @@ config.substitutions.append(('%{link_flags}',
     '-L %{lib} -lunwind'
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T --prepend_env PATH=%{lib} -- '
+    '%{executor} --execdir %{temp} --prepend_env PATH=%{lib} -- '
 ))
 
 import os, site
diff --git a/libunwind/test/configs/llvm-libunwind-static.cfg.in b/libunwind/test/configs/llvm-libunwind-static.cfg.in
index a3a65ae..194fa4f 100644
--- a/libunwind/test/configs/llvm-libunwind-static.cfg.in
+++ b/libunwind/test/configs/llvm-libunwind-static.cfg.in
@@ -37,7 +37,7 @@ config.substitutions.append(('%{link_flags}',
     '%{{lib}}/libunwind.a {}'.format(' '.join(link_flags))
 ))
 config.substitutions.append(('%{exec}',
-    '%{executor} --execdir %T -- '
+    '%{executor} --execdir %{temp} -- '
 ))
 
 import os, site
diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py
index 768dd6f..ebba4d1 100644
--- a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py
+++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py
@@ -12,20 +12,6 @@ from lldbsuite.test import lldbutil
 class TestCortexMExceptionUnwind(TestBase):
     NO_DEBUG_INFO_TESTCASE = True
 
-    # on the lldb-remote-linux-ubuntu CI, the binary.json's triple of
-    # armv7m-apple is not being set in the Target triple, and we're
-    # picking the wrong ABI plugin, ABISysV_arm.
-    # ABISysV_arm::CreateDefaultUnwindPlan() doesn't have a way to detect
-    # arm/thumb for a stack frame, or even the Target's triple for a
-    # Cortex-M part that is always thumb.  It hardcodes r11 as the frame
-    # pointer register, which is correct for arm code but not thumb.
-    # It is never correct # on a Cortex-M target.
-    # The Darwin ABIMacOSX_arm diverges from AAPCS and always uses r7 for
-    # the frame pointer -- the thumb convention -- whether executing arm or
-    # thumb.  So its CreateDefaultUnwindPlan picks the correct register for
-    # the frame pointer, and we can walk the stack.
-    # ABISysV_arm::CreateDefaultUnwindPlan will only get one frame and
-    # not be able to continue.
     @skipIfRemote
     def test_no_fpu(self):
         """Test that we can backtrace correctly through an ARM Cortex-M Exception return stack"""
@@ -59,10 +45,9 @@ class TestCortexMExceptionUnwind(TestBase):
         # frames above that.  The topmost two stack frames
         # were not interesting for this test, so I didn't
         # create symbols for them.
-        self.assertEqual(thread.GetNumFrames(), 6)
+        self.assertEqual(thread.GetNumFrames(), 3)
         stackframe_names = [
             "exception_catcher",
-            "exception_catcher",
             "exception_thrower",
             "main",
         ]
diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/armv7m-nofpu-exception.yaml b/lldb/test/API/functionalities/unwind/cortex-m-exception/armv7m-nofpu-exception.yaml
index 9ce5ff4..0b4e1f8 100644
--- a/lldb/test/API/functionalities/unwind/cortex-m-exception/armv7m-nofpu-exception.yaml
+++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/armv7m-nofpu-exception.yaml
@@ -2,8 +2,8 @@ cpu: armv7m
 threads:
   - regsets:
       - flavor: gpr
-        registers: [{name: sp, value: 0x2000fe70}, {name: r7, value: 0x2000fe80}, 
-                    {name: pc, value: 0x0020392c}, {name: lr, value: 0x0020392d}]
+        registers: [{name: sp, value: 0x2000fe88}, {name: r7, value: 0x2000fe88}, 
+                    {name: pc, value: 0x00203916}, {name: lr, value: 0x0020392d}]
 memory-regions:
   # stack memory fetched via 
   # (lldb) p/x $sp
@@ -14,7 +14,7 @@ memory-regions:
       0x0000002a, 0x20010e58, 0x00203923, 0x00000001,
       0x2000fe88, 0x00203911, 0x2000ffdc, 0xfffffff9,
       0x00000102, 0x00000002, 0x000003f0, 0x0000002a,
-      0x20012620, 0x00203215, 0x00203366, 0x81000200,
+      0x20012620, 0x00203215, 0x00202a92, 0x81000200,
       0x00203215, 0x200128b0, 0x0024928d, 0x2000fecc,
       0x002491ed, 0x20010e58, 0x20010e4c, 0x2000ffa0,
       0x200107a0, 0x0000003c, 0x200116e8, 0x200108b0,
@@ -62,3 +62,26 @@ memory-regions:
       0x98, 0xae, 0x28, 0x00
     ]
 
+  # exception_thrower
+  # (lldb) disass -b -c 12 -n exception_thrower
+  # 0x202a88 <+0>:  0xb5f0       push   {r4, r5, r6, r7, lr}
+  # 0x202a8a <+2>:  0xaf03       add    r7, sp, #0xc
+  # 0x202a8c <+4>:  0xe92d0f00   push.w {r8, r9, r10, r11}
+  # 0x202a90 <+8>:  0xb0c3       sub    sp, #0x10c
+  # 0x202a92 <+10>: 0xf7ffffd9   bl     0x202a48                 
+  - addr: 0x202a88
+    UInt8: [
+      0xf0, 0xb5, 0x03, 0xaf, 0x2d, 0xe9, 0x00, 0x0f,
+      0xc3, 0xb0, 0xff, 0xf7, 0xd9, 0xff, 0xff, 0xf7
+    ]
+
+  # main:
+  # 0x202a7e <+0>: push   {r7, lr}
+  # 0x202a80 <+2>: mov    r7, sp
+  # 0x202a82 <+4>: bl     0x202a88                  ; exception_thrower
+  # 0x202a86 <+8>: nop
+  - addr: 0x202a7e
+    UInt8: [
+      0x80, 0xb5, 0x6f, 0x46, 0x00, 0xf0, 0x01, 0xf8,
+      0x00, 0xbf
+    ]
diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/binary.json b/lldb/test/API/functionalities/unwind/cortex-m-exception/binary.json
index 8fcd530..0de0169 100644
--- a/lldb/test/API/functionalities/unwind/cortex-m-exception/binary.json
+++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/binary.json
@@ -1,5 +1,5 @@
 {
-  "triple": "armv7m-apple",
+  "triple": "armv7m--",
   "uuid": "2D157DBA-53C9-3AC7-B5A1-9D336EC831CB",
   "type": "executable",
   "sections": [
@@ -28,13 +28,13 @@
     {
       "name": "exception_catcher",
       "type": "code",
-      "size": 44,
+      "size": 32,
       "address": 2111760
     },
     {
       "name": "exception_thrower",
       "type": "code",
-      "size": 2652,
+      "size": 16,
       "address": 2108040
     }
   ]
diff --git a/lldb/unittests/Host/JSONTransportTest.cpp b/lldb/unittests/Host/JSONTransportTest.cpp
index 7db6508..54f1372 100644
--- a/lldb/unittests/Host/JSONTransportTest.cpp
+++ b/lldb/unittests/Host/JSONTransportTest.cpp
@@ -49,85 +49,85 @@ namespace {
 
 namespace test_protocol {
 
-struct Req {
+struct Request {
   int id = 0;
   std::string name;
   std::optional<json::Value> params;
 };
-json::Value toJSON(const Req &T) {
+json::Value toJSON(const Request &T) {
   return json::Object{{"name", T.name}, {"id", T.id}, {"params", T.params}};
 }
-bool fromJSON(const json::Value &V, Req &T, json::Path P) {
+bool fromJSON(const json::Value &V, Request &T, json::Path P) {
   json::ObjectMapper O(V, P);
   return O && O.map("name", T.name) && O.map("id", T.id) &&
          O.map("params", T.params);
 }
-bool operator==(const Req &a, const Req &b) {
+bool operator==(const Request &a, const Request &b) {
   return a.name == b.name && a.id == b.id && a.params == b.params;
 }
-inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Req &V) {
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Request &V) {
   OS << toJSON(V);
   return OS;
 }
-void PrintTo(const Req &message, std::ostream *os) {
+void PrintTo(const Request &message, std::ostream *os) {
   std::string O;
   llvm::raw_string_ostream OS(O);
   OS << message;
   *os << O;
 }
 
-struct Resp {
+struct Response {
   int id = 0;
   int errorCode = 0;
   std::optional<json::Value> result;
 };
-json::Value toJSON(const Resp &T) {
+json::Value toJSON(const Response &T) {
   return json::Object{
       {"id", T.id}, {"errorCode", T.errorCode}, {"result", T.result}};
 }
-bool fromJSON(const json::Value &V, Resp &T, json::Path P) {
+bool fromJSON(const json::Value &V, Response &T, json::Path P) {
   json::ObjectMapper O(V, P);
   return O && O.map("id", T.id) && O.mapOptional("errorCode", T.errorCode) &&
          O.map("result", T.result);
 }
-bool operator==(const Resp &a, const Resp &b) {
+bool operator==(const Response &a, const Response &b) {
   return a.id == b.id && a.errorCode == b.errorCode && a.result == b.result;
 }
-inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Resp &V) {
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Response &V) {
   OS << toJSON(V);
   return OS;
 }
-void PrintTo(const Resp &message, std::ostream *os) {
+void PrintTo(const Response &message, std::ostream *os) {
   std::string O;
   llvm::raw_string_ostream OS(O);
   OS << message;
   *os << O;
 }
 
-struct Evt {
+struct Event {
   std::string name;
   std::optional<json::Value> params;
 };
-json::Value toJSON(const Evt &T) {
+json::Value toJSON(const Event &T) {
   return json::Object{{"name", T.name}, {"params", T.params}};
 }
-bool fromJSON(const json::Value &V, Evt &T, json::Path P) {
+bool fromJSON(const json::Value &V, Event &T, json::Path P) {
   json::ObjectMapper O(V, P);
   return O && O.map("name", T.name) && O.map("params", T.params);
 }
-bool operator==(const Evt &a, const Evt &b) { return a.name == b.name; }
-inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Evt &V) {
+bool operator==(const Event &a, const Event &b) { return a.name == b.name; }
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Event &V) {
   OS << toJSON(V);
   return OS;
 }
-void PrintTo(const Evt &message, std::ostream *os) {
+void PrintTo(const Event &message, std::ostream *os) {
   std::string O;
   llvm::raw_string_ostream OS(O);
   OS << message;
   *os << O;
 }
 
-using Message = std::variant<Req, Resp, Evt>;
+using Message = std::variant<Request, Response, Event>;
 json::Value toJSON(const Message &msg) {
   return std::visit([](const auto &msg) { return toJSON(msg); }, msg);
 }
@@ -139,7 +139,7 @@ bool fromJSON(const json::Value &V, Message &msg, json::Path P) {
   }
 
   if (O->find("id") == O->end()) {
-    Evt E;
+    Event E;
     if (!fromJSON(V, E, P))
       return false;
 
@@ -148,7 +148,7 @@ bool fromJSON(const json::Value &V, Message &msg, json::Path P) {
   }
 
   if (O->get("name")) {
-    Req R;
+    Request R;
     if (!fromJSON(V, R, P))
       return false;
 
@@ -156,7 +156,7 @@ bool fromJSON(const json::Value &V, Message &msg, json::Path P) {
     return true;
   }
 
-  Resp R;
+  Response R;
   if (!fromJSON(V, R, P))
     return false;
 
@@ -187,9 +187,9 @@ bool fromJSON(const json::Value &V, MyFnResult &T, json::Path P) {
 
 struct ProtoDesc {
   using Id = int;
-  using Req = Req;
-  using Resp = Resp;
-  using Evt = Evt;
+  using Req = Request;
+  using Resp = Response;
+  using Evt = Event;
 
   static inline Id InitialId() { return 0; }
   static inline Req Make(Id id, llvm::StringRef method,
@@ -393,8 +393,8 @@ TEST_F(HTTPDelimitedJSONTransportTest, MalformedRequests) {
 }
 
 TEST_F(HTTPDelimitedJSONTransportTest, Read) {
-  Write(Req{6, "foo", std::nullopt});
-  EXPECT_CALL(message_handler, Received(Req{6, "foo", std::nullopt}));
+  Write(Request{6, "foo", std::nullopt});
+  EXPECT_CALL(message_handler, Received(Request{6, "foo", std::nullopt}));
   ASSERT_THAT_ERROR(Run(), Succeeded());
 }
 
@@ -402,17 +402,18 @@ TEST_F(HTTPDelimitedJSONTransportTest, ReadMultipleMessagesInSingleWrite) {
   InSequence seq;
   Write(
       Message{
-          Req{6, "one", std::nullopt},
+          Request{6, "one", std::nullopt},
       },
       Message{
-          Evt{"two", std::nullopt},
+          test_protocol::Event{"two", std::nullopt},
       },
       Message{
-          Resp{2, 0, std::nullopt},
+          Response{2, 0, std::nullopt},
       });
-  EXPECT_CALL(message_handler, Received(Req{6, "one", std::nullopt}));
-  EXPECT_CALL(message_handler, Received(Evt{"two", std::nullopt}));
-  EXPECT_CALL(message_handler, Received(Resp{2, 0, std::nullopt}));
+  EXPECT_CALL(message_handler, Received(Request{6, "one", std::nullopt}));
+  EXPECT_CALL(message_handler,
+              Received(test_protocol::Event{"two", std::nullopt}));
+  EXPECT_CALL(message_handler, Received(Response{2, 0, std::nullopt}));
   ASSERT_THAT_ERROR(Run(), Succeeded());
 }
 
@@ -420,18 +421,18 @@ TEST_F(HTTPDelimitedJSONTransportTest, ReadAcrossMultipleChunks) {
   std::string long_str = std::string(
       HTTPDelimitedJSONTransport<test_protocol::ProtoDesc>::kReadBufferSize * 2,
       'x');
-  Write(Req{5, long_str, std::nullopt});
-  EXPECT_CALL(message_handler, Received(Req{5, long_str, std::nullopt}));
+  Write(Request{5, long_str, std::nullopt});
+  EXPECT_CALL(message_handler, Received(Request{5, long_str, std::nullopt}));
   ASSERT_THAT_ERROR(Run(), Succeeded());
 }
 
 TEST_F(HTTPDelimitedJSONTransportTest, ReadPartialMessage) {
-  std::string message = Encode(Req{5, "foo", std::nullopt});
+  std::string message = Encode(Request{5, "foo", std::nullopt});
   auto split_at = message.size() / 2;
   std::string part1 = message.substr(0, split_at);
   std::string part2 = message.substr(split_at);
 
-  EXPECT_CALL(message_handler, Received(Req{5, "foo", std::nullopt}));
+  EXPECT_CALL(message_handler, Received(Request{5, "foo", std::nullopt}));
 
   ASSERT_THAT_EXPECTED(input.Write(part1.data(), part1.size()), Succeeded());
   loop.AddPendingCallback(
@@ -443,12 +444,12 @@ TEST_F(HTTPDelimitedJSONTransportTest, ReadPartialMessage) {
 }
 
 TEST_F(HTTPDelimitedJSONTransportTest, ReadWithZeroByteWrites) {
-  std::string message = Encode(Req{6, "foo", std::nullopt});
+  std::string message = Encode(Request{6, "foo", std::nullopt});
   auto split_at = message.size() / 2;
   std::string part1 = message.substr(0, split_at);
   std::string part2 = message.substr(split_at);
 
-  EXPECT_CALL(message_handler, Received(Req{6, "foo", std::nullopt}));
+  EXPECT_CALL(message_handler, Received(Request{6, "foo", std::nullopt}));
 
   ASSERT_THAT_EXPECTED(input.Write(part1.data(), part1.size()), Succeeded());
 
@@ -500,9 +501,11 @@ TEST_F(HTTPDelimitedJSONTransportTest, InvalidTransport) {
 }
 
 TEST_F(HTTPDelimitedJSONTransportTest, Write) {
-  ASSERT_THAT_ERROR(transport->Send(Req{7, "foo", std::nullopt}), Succeeded());
-  ASSERT_THAT_ERROR(transport->Send(Resp{5, 0, "bar"}), Succeeded());
-  ASSERT_THAT_ERROR(transport->Send(Evt{"baz", std::nullopt}), Succeeded());
+  ASSERT_THAT_ERROR(transport->Send(Request{7, "foo", std::nullopt}),
+                    Succeeded());
+  ASSERT_THAT_ERROR(transport->Send(Response{5, 0, "bar"}), Succeeded());
+  ASSERT_THAT_ERROR(transport->Send(test_protocol::Event{"baz", std::nullopt}),
+                    Succeeded());
   output.CloseWriteFileDescriptor();
   char buf[1024];
   Expected<size_t> bytes_read =
@@ -530,18 +533,20 @@ TEST_F(JSONRPCTransportTest, MalformedRequests) {
 }
 
 TEST_F(JSONRPCTransportTest, Read) {
-  Write(Message{Req{1, "foo", std::nullopt}});
-  EXPECT_CALL(message_handler, Received(Req{1, "foo", std::nullopt}));
+  Write(Message{Request{1, "foo", std::nullopt}});
+  EXPECT_CALL(message_handler, Received(Request{1, "foo", std::nullopt}));
   ASSERT_THAT_ERROR(Run(), Succeeded());
 }
 
 TEST_F(JSONRPCTransportTest, ReadMultipleMessagesInSingleWrite) {
   InSequence seq;
-  Write(Message{Req{1, "one", std::nullopt}}, Message{Evt{"two", std::nullopt}},
-        Message{Resp{3, 0, "three"}});
-  EXPECT_CALL(message_handler, Received(Req{1, "one", std::nullopt}));
-  EXPECT_CALL(message_handler, Received(Evt{"two", std::nullopt}));
-  EXPECT_CALL(message_handler, Received(Resp{3, 0, "three"}));
+  Write(Message{Request{1, "one", std::nullopt}},
+        Message{test_protocol::Event{"two", std::nullopt}},
+        Message{Response{3, 0, "three"}});
+  EXPECT_CALL(message_handler, Received(Request{1, "one", std::nullopt}));
+  EXPECT_CALL(message_handler,
+              Received(test_protocol::Event{"two", std::nullopt}));
+  EXPECT_CALL(message_handler, Received(Response{3, 0, "three"}));
   ASSERT_THAT_ERROR(Run(), Succeeded());
 }
 
@@ -550,8 +555,8 @@ TEST_F(JSONRPCTransportTest, ReadAcrossMultipleChunks) {
   // across the chunk boundary.
   std::string long_str = std::string(
       IOTransport<test_protocol::ProtoDesc>::kReadBufferSize * 2, 'x');
-  Write(Req{42, long_str, std::nullopt});
-  EXPECT_CALL(message_handler, Received(Req{42, long_str, std::nullopt}));
+  Write(Request{42, long_str, std::nullopt});
+  EXPECT_CALL(message_handler, Received(Request{42, long_str, std::nullopt}));
   ASSERT_THAT_ERROR(Run(), Succeeded());
 }
 
@@ -561,7 +566,7 @@ TEST_F(JSONRPCTransportTest, ReadPartialMessage) {
   std::string part1 = message.substr(0, 7);
   std::string part2 = message.substr(7);
 
-  EXPECT_CALL(message_handler, Received(Req{42, "foo", std::nullopt}));
+  EXPECT_CALL(message_handler, Received(Request{42, "foo", std::nullopt}));
 
   ASSERT_THAT_EXPECTED(input.Write(part1.data(), part1.size()), Succeeded());
   loop.AddPendingCallback(
@@ -591,9 +596,11 @@ TEST_F(JSONRPCTransportTest, ReaderWithUnhandledData) {
 }
 
 TEST_F(JSONRPCTransportTest, Write) {
-  ASSERT_THAT_ERROR(transport->Send(Req{11, "foo", std::nullopt}), Succeeded());
-  ASSERT_THAT_ERROR(transport->Send(Resp{14, 0, "bar"}), Succeeded());
-  ASSERT_THAT_ERROR(transport->Send(Evt{"baz", std::nullopt}), Succeeded());
+  ASSERT_THAT_ERROR(transport->Send(Request{11, "foo", std::nullopt}),
+                    Succeeded());
+  ASSERT_THAT_ERROR(transport->Send(Response{14, 0, "bar"}), Succeeded());
+  ASSERT_THAT_ERROR(transport->Send(test_protocol::Event{"baz", std::nullopt}),
+                    Succeeded());
   output.CloseWriteFileDescriptor();
   char buf[1024];
   Expected<size_t> bytes_read =
@@ -624,8 +631,8 @@ TEST_F(TransportBinderTest, OutBoundRequests) {
     EXPECT_EQ(result->c, 3);
     replied = true;
   });
-  EXPECT_CALL(remote, Received(Req{1, "add", MyFnParams{1, 2}}));
-  EXPECT_THAT_ERROR(from_remote->Send(Resp{1, 0, toJSON(MyFnResult{3})}),
+  EXPECT_CALL(remote, Received(Request{1, "add", MyFnParams{1, 2}}));
+  EXPECT_THAT_ERROR(from_remote->Send(Response{1, 0, toJSON(MyFnResult{3})}),
                     Succeeded());
   Run();
   EXPECT_TRUE(replied);
@@ -640,8 +647,8 @@ TEST_F(TransportBinderTest, OutBoundRequestsVoidParams) {
     EXPECT_EQ(result->c, 3);
     replied = true;
   });
-  EXPECT_CALL(remote, Received(Req{1, "voidParam", std::nullopt}));
-  EXPECT_THAT_ERROR(from_remote->Send(Resp{1, 0, toJSON(MyFnResult{3})}),
+  EXPECT_CALL(remote, Received(Request{1, "voidParam", std::nullopt}));
+  EXPECT_THAT_ERROR(from_remote->Send(Response{1, 0, toJSON(MyFnResult{3})}),
                     Succeeded());
   Run();
   EXPECT_TRUE(replied);
@@ -655,8 +662,9 @@ TEST_F(TransportBinderTest, OutBoundRequestsVoidResult) {
     EXPECT_THAT_ERROR(std::move(error), Succeeded());
     replied = true;
   });
-  EXPECT_CALL(remote, Received(Req{1, "voidResult", MyFnParams{4, 5}}));
-  EXPECT_THAT_ERROR(from_remote->Send(Resp{1, 0, std::nullopt}), Succeeded());
+  EXPECT_CALL(remote, Received(Request{1, "voidResult", MyFnParams{4, 5}}));
+  EXPECT_THAT_ERROR(from_remote->Send(Response{1, 0, std::nullopt}),
+                    Succeeded());
   Run();
   EXPECT_TRUE(replied);
 }
@@ -669,8 +677,9 @@ TEST_F(TransportBinderTest, OutBoundRequestsVoidParamsAndVoidResult) {
     EXPECT_THAT_ERROR(std::move(error), Succeeded());
     replied = true;
   });
-  EXPECT_CALL(remote, Received(Req{1, "voidParamAndResult", std::nullopt}));
-  EXPECT_THAT_ERROR(from_remote->Send(Resp{1, 0, std::nullopt}), Succeeded());
+  EXPECT_CALL(remote, Received(Request{1, "voidParamAndResult", std::nullopt}));
+  EXPECT_THAT_ERROR(from_remote->Send(Response{1, 0, std::nullopt}),
+                    Succeeded());
   Run();
   EXPECT_TRUE(replied);
 }
@@ -686,10 +695,10 @@ TEST_F(TransportBinderTest, InBoundRequests) {
         return MyFnResult{params.a + params.b + captured_param};
       },
       2);
-  EXPECT_THAT_ERROR(from_remote->Send(Req{1, "add", MyFnParams{3, 4}}),
+  EXPECT_THAT_ERROR(from_remote->Send(Request{1, "add", MyFnParams{3, 4}}),
                     Succeeded());
 
-  EXPECT_CALL(remote, Received(Resp{1, 0, MyFnResult{9}}));
+  EXPECT_CALL(remote, Received(Response{1, 0, MyFnResult{9}}));
   Run();
   EXPECT_TRUE(called);
 }
@@ -703,9 +712,9 @@ TEST_F(TransportBinderTest, InBoundRequestsVoidParams) {
         return MyFnResult{captured_param};
       },
       2);
-  EXPECT_THAT_ERROR(from_remote->Send(Req{2, "voidParam", std::nullopt}),
+  EXPECT_THAT_ERROR(from_remote->Send(Request{2, "voidParam", std::nullopt}),
                     Succeeded());
-  EXPECT_CALL(remote, Received(Resp{2, 0, MyFnResult{2}}));
+  EXPECT_CALL(remote, Received(Response{2, 0, MyFnResult{2}}));
   Run();
   EXPECT_TRUE(called);
 }
@@ -722,9 +731,10 @@ TEST_F(TransportBinderTest, InBoundRequestsVoidResult) {
         return llvm::Error::success();
       },
       2);
-  EXPECT_THAT_ERROR(from_remote->Send(Req{3, "voidResult", MyFnParams{3, 4}}),
-                    Succeeded());
-  EXPECT_CALL(remote, Received(Resp{3, 0, std::nullopt}));
+  EXPECT_THAT_ERROR(
+      from_remote->Send(Request{3, "voidResult", MyFnParams{3, 4}}),
+      Succeeded());
+  EXPECT_CALL(remote, Received(Response{3, 0, std::nullopt}));
   Run();
   EXPECT_TRUE(called);
 }
@@ -739,9 +749,9 @@ TEST_F(TransportBinderTest, InBoundRequestsVoidParamsAndResult) {
       },
       2);
   EXPECT_THAT_ERROR(
-      from_remote->Send(Req{4, "voidParamAndResult", std::nullopt}),
+      from_remote->Send(Request{4, "voidParamAndResult", std::nullopt}),
       Succeeded());
-  EXPECT_CALL(remote, Received(Resp{4, 0, std::nullopt}));
+  EXPECT_CALL(remote, Received(Response{4, 0, std::nullopt}));
   Run();
   EXPECT_TRUE(called);
 }
@@ -750,14 +760,14 @@ TEST_F(TransportBinderTest, InBoundRequestsVoidParamsAndResult) {
 TEST_F(TransportBinderTest, OutBoundEvents) {
   OutgoingEvent<MyFnParams> emitEvent = binder->Bind<MyFnParams>("evt");
   emitEvent(MyFnParams{1, 2});
-  EXPECT_CALL(remote, Received(Evt{"evt", MyFnParams{1, 2}}));
+  EXPECT_CALL(remote, Received(test_protocol::Event{"evt", MyFnParams{1, 2}}));
   Run();
 }
 
 TEST_F(TransportBinderTest, OutBoundEventsVoidParams) {
   OutgoingEvent<void> emitEvent = binder->Bind<void>("evt");
   emitEvent();
-  EXPECT_CALL(remote, Received(Evt{"evt", std::nullopt}));
+  EXPECT_CALL(remote, Received(test_protocol::Event{"evt", std::nullopt}));
   Run();
 }
 
@@ -773,8 +783,9 @@ TEST_F(TransportBinderTest, InBoundEvents) {
         called = true;
       },
       42);
-  EXPECT_THAT_ERROR(from_remote->Send(Evt{"evt", MyFnParams{3, 4}}),
-                    Succeeded());
+  EXPECT_THAT_ERROR(
+      from_remote->Send(test_protocol::Event{"evt", MyFnParams{3, 4}}),
+      Succeeded());
   Run();
   EXPECT_TRUE(called);
 }
@@ -788,7 +799,9 @@ TEST_F(TransportBinderTest, InBoundEventsVoidParams) {
         called = true;
       },
       42);
-  EXPECT_THAT_ERROR(from_remote->Send(Evt{"evt", std::nullopt}), Succeeded());
+  EXPECT_THAT_ERROR(
+      from_remote->Send(test_protocol::Event{"evt", std::nullopt}),
+      Succeeded());
   Run();
   EXPECT_TRUE(called);
 }
diff --git a/llvm/docs/MLGO.rst b/llvm/docs/MLGO.rst
index 965a21b..bf3de11 100644
--- a/llvm/docs/MLGO.rst
+++ b/llvm/docs/MLGO.rst
@@ -508,7 +508,7 @@ embeddings can be computed and accessed via an ``ir2vec::Embedder`` instance.
 
    .. code-block:: c++
 
-    const ir2vec::Embedding &FuncVector = Emb->getFunctionVector();
+    ir2vec::Embedding FuncVector = Emb->getFunctionVector();
 
    Currently, ``Embedder`` can generate embeddings at three levels: Instructions,
    Basic Blocks, and Functions. Appropriate getters are provided to access the
diff --git a/llvm/include/llvm/ADT/Bitset.h b/llvm/include/llvm/ADT/Bitset.h
index b1e539e..0dfeb20 100644
--- a/llvm/include/llvm/ADT/Bitset.h
+++ b/llvm/include/llvm/ADT/Bitset.h
@@ -38,14 +38,22 @@ class Bitset {
   static constexpr unsigned NumWords =
       (NumBits + BitwordBits - 1) / BitwordBits;
 
-protected:
   using StorageType = std::array<BitWord, NumWords>;
-
-private:
   StorageType Bits{};
 
 protected:
-  constexpr Bitset(const StorageType &B) : Bits{B} {}
+  constexpr Bitset(const std::array<uint64_t, (NumBits + 63) / 64> &B) {
+    if constexpr (sizeof(BitWord) == sizeof(uint64_t)) {
+      for (size_t I = 0; I != B.size(); ++I)
+        Bits[I] = B[I];
+    } else {
+      for (size_t I = 0; I != B.size(); ++I) {
+        uint64_t Elt = B[I];
+        Bits[2 * I] = static_cast<uint32_t>(Elt);
+        Bits[2 * I + 1] = static_cast<uint32_t>(Elt >> 32);
+      }
+    }
+  }
 
 public:
   constexpr Bitset() = default;
diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h
index 81409df..6bc51fe 100644
--- a/llvm/include/llvm/Analysis/IR2Vec.h
+++ b/llvm/include/llvm/Analysis/IR2Vec.h
@@ -533,21 +533,20 @@ protected:
   /// in the IR instructions to generate the vector representation.
   const float OpcWeight, TypeWeight, ArgWeight;
 
-  // Utility maps - these are used to store the vector representations of
-  // instructions, basic blocks and functions.
-  mutable Embedding FuncVector;
-  mutable BBEmbeddingsMap BBVecMap;
-  mutable InstEmbeddingsMap InstVecMap;
-
-  LLVM_ABI Embedder(const Function &F, const Vocabulary &Vocab);
+  LLVM_ABI Embedder(const Function &F, const Vocabulary &Vocab)
+      : F(F), Vocab(Vocab), Dimension(Vocab.getDimension()),
+        OpcWeight(ir2vec::OpcWeight), TypeWeight(ir2vec::TypeWeight),
+        ArgWeight(ir2vec::ArgWeight) {}
 
-  /// Function to compute embeddings. It generates embeddings for all
-  /// the instructions and basic blocks in the function F.
-  void computeEmbeddings() const;
+  /// Function to compute embeddings.
+  Embedding computeEmbeddings() const;
 
   /// Function to compute the embedding for a given basic block.
+  Embedding computeEmbeddings(const BasicBlock &BB) const;
+
+  /// Function to compute the embedding for a given instruction.
   /// Specific to the kind of embeddings being computed.
-  virtual void computeEmbeddings(const BasicBlock &BB) const = 0;
+  virtual Embedding computeEmbeddings(const Instruction &I) const = 0;
 
 public:
   virtual ~Embedder() = default;
@@ -556,23 +555,27 @@ public:
   LLVM_ABI static std::unique_ptr<Embedder>
   create(IR2VecKind Mode, const Function &F, const Vocabulary &Vocab);
 
-  /// Returns a map containing instructions and the corresponding embeddings for
-  /// the function F if it has been computed. If not, it computes the embeddings
-  /// for the function and returns the map.
-  LLVM_ABI const InstEmbeddingsMap &getInstVecMap() const;
-
-  /// Returns a map containing basic block and the corresponding embeddings for
-  /// the function F if it has been computed. If not, it computes the embeddings
-  /// for the function and returns the map.
-  LLVM_ABI const BBEmbeddingsMap &getBBVecMap() const;
+  /// Computes and returns the embedding for a given instruction in the function
+  /// F
+  LLVM_ABI Embedding getInstVector(const Instruction &I) const {
+    return computeEmbeddings(I);
+  }
 
-  /// Returns the embedding for a given basic block in the function F if it has
-  /// been computed. If not, it computes the embedding for the basic block and
-  /// returns it.
-  LLVM_ABI const Embedding &getBBVector(const BasicBlock &BB) const;
+  /// Computes and returns the embedding for a given basic block in the function
+  /// F
+  LLVM_ABI Embedding getBBVector(const BasicBlock &BB) const {
+    return computeEmbeddings(BB);
+  }
 
   /// Computes and returns the embedding for the current function.
-  LLVM_ABI const Embedding &getFunctionVector() const;
+  LLVM_ABI Embedding getFunctionVector() const { return computeEmbeddings(); }
+
+  /// Invalidate embeddings if cached. The embeddings may not be relevant
+  /// anymore when the IR changes due to transformations. In such cases, the
+  /// cached embeddings should be invalidated to ensure
+  /// correctness/recomputation. This is a no-op for SymbolicEmbedder but
+  /// removes all the cached entries in FlowAwareEmbedder.
+  virtual void invalidateEmbeddings() { return; }
 };
 
 /// Class for computing the Symbolic embeddings of IR2Vec.
@@ -580,7 +583,7 @@ public:
 /// representations obtained from the Vocabulary.
 class LLVM_ABI SymbolicEmbedder : public Embedder {
 private:
-  void computeEmbeddings(const BasicBlock &BB) const override;
+  Embedding computeEmbeddings(const Instruction &I) const override;
 
 public:
   SymbolicEmbedder(const Function &F, const Vocabulary &Vocab)
@@ -592,11 +595,15 @@ public:
 /// embeddings, and additionally capture the flow information in the IR.
 class LLVM_ABI FlowAwareEmbedder : public Embedder {
 private:
-  void computeEmbeddings(const BasicBlock &BB) const override;
+  // FlowAware embeddings would benefit from caching instruction embeddings as
+  // they are reused while computing the embeddings of other instructions.
+  mutable InstEmbeddingsMap InstVecMap;
+  Embedding computeEmbeddings(const Instruction &I) const override;
 
 public:
   FlowAwareEmbedder(const Function &F, const Vocabulary &Vocab)
       : Embedder(F, Vocab) {}
+  void invalidateEmbeddings() override { InstVecMap.clear(); }
 };
 
 } // namespace ir2vec
diff --git a/llvm/include/llvm/IR/ConstantFPRange.h b/llvm/include/llvm/IR/ConstantFPRange.h
index 930c6f9..4a54caa 100644
--- a/llvm/include/llvm/IR/ConstantFPRange.h
+++ b/llvm/include/llvm/IR/ConstantFPRange.h
@@ -200,6 +200,12 @@ public:
   /// with another range.  The resultant range is guaranteed to include the
   /// elements of both sets, but may contain more.
   LLVM_ABI ConstantFPRange unionWith(const ConstantFPRange &CR) const;
+
+  /// Calculate absolute value range.
+  LLVM_ABI ConstantFPRange abs() const;
+
+  /// Calculate range of negated values.
+  LLVM_ABI ConstantFPRange negate() const;
 };
 
 inline raw_ostream &operator<<(raw_ostream &OS, const ConstantFPRange &CR) {
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 7bc90d4..774063b 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -116,7 +116,7 @@ def SDTIntBinOp : SDTypeProfile<1, 2, [     // add, and, or, xor, udiv, etc.
   SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>
 ]>;
 def SDTIntShiftOp : SDTypeProfile<1, 2, [   // shl, sra, srl
-  SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>
+  SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>, SDTCisSameNumEltsAs<0, 2>
 ]>;
 def SDTIntShiftPairOp : SDTypeProfile<2, 3, [ // shl_parts, sra_parts, srl_parts
   SDTCisInt<0>, SDTCisSameAs<1, 0>,
diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp
index 1794a60..85b5372 100644
--- a/llvm/lib/Analysis/IR2Vec.cpp
+++ b/llvm/lib/Analysis/IR2Vec.cpp
@@ -153,11 +153,6 @@ void Embedding::print(raw_ostream &OS) const {
 // Embedder and its subclasses
 //===----------------------------------------------------------------------===//
 
-Embedder::Embedder(const Function &F, const Vocabulary &Vocab)
-    : F(F), Vocab(Vocab), Dimension(Vocab.getDimension()),
-      OpcWeight(::OpcWeight), TypeWeight(::TypeWeight), ArgWeight(::ArgWeight),
-      FuncVector(Embedding(Dimension)) {}
-
 std::unique_ptr<Embedder> Embedder::create(IR2VecKind Mode, const Function &F,
                                            const Vocabulary &Vocab) {
   switch (Mode) {
@@ -169,110 +164,85 @@ std::unique_ptr<Embedder> Embedder::create(IR2VecKind Mode, const Function &F,
   return nullptr;
 }
 
-const InstEmbeddingsMap &Embedder::getInstVecMap() const {
-  if (InstVecMap.empty())
-    computeEmbeddings();
-  return InstVecMap;
-}
-
-const BBEmbeddingsMap &Embedder::getBBVecMap() const {
-  if (BBVecMap.empty())
-    computeEmbeddings();
-  return BBVecMap;
-}
-
-const Embedding &Embedder::getBBVector(const BasicBlock &BB) const {
-  auto It = BBVecMap.find(&BB);
-  if (It != BBVecMap.end())
-    return It->second;
-  computeEmbeddings(BB);
-  return BBVecMap[&BB];
-}
+Embedding Embedder::computeEmbeddings() const {
+  Embedding FuncVector(Dimension, 0.0);
 
-const Embedding &Embedder::getFunctionVector() const {
-  // Currently, we always (re)compute the embeddings for the function.
-  // This is cheaper than caching the vector.
-  computeEmbeddings();
-  return FuncVector;
-}
-
-void Embedder::computeEmbeddings() const {
   if (F.isDeclaration())
-    return;
-
-  FuncVector = Embedding(Dimension, 0.0);
+    return FuncVector;
 
   // Consider only the basic blocks that are reachable from entry
-  for (const BasicBlock *BB : depth_first(&F)) {
-    computeEmbeddings(*BB);
-    FuncVector += BBVecMap[BB];
-  }
+  for (const BasicBlock *BB : depth_first(&F))
+    FuncVector += computeEmbeddings(*BB);
+  return FuncVector;
 }
 
-void SymbolicEmbedder::computeEmbeddings(const BasicBlock &BB) const {
+Embedding Embedder::computeEmbeddings(const BasicBlock &BB) const {
   Embedding BBVector(Dimension, 0);
 
   // We consider only the non-debug and non-pseudo instructions
-  for (const auto &I : BB.instructionsWithoutDebug()) {
-    Embedding ArgEmb(Dimension, 0);
-    for (const auto &Op : I.operands())
-      ArgEmb += Vocab[*Op];
-    auto InstVector =
-        Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb;
-    if (const auto *IC = dyn_cast<CmpInst>(&I))
-      InstVector += Vocab[IC->getPredicate()];
-    InstVecMap[&I] = InstVector;
-    BBVector += InstVector;
-  }
-  BBVecMap[&BB] = BBVector;
-}
-
-void FlowAwareEmbedder::computeEmbeddings(const BasicBlock &BB) const {
-  Embedding BBVector(Dimension, 0);
+  for (const auto &I : BB.instructionsWithoutDebug())
+    BBVector += computeEmbeddings(I);
+  return BBVector;
+}
+
+Embedding SymbolicEmbedder::computeEmbeddings(const Instruction &I) const {
+  // Currently, we always (re)compute the embeddings for symbolic embedder.
+  // This is cheaper than caching the vectors.
+  Embedding ArgEmb(Dimension, 0);
+  for (const auto &Op : I.operands())
+    ArgEmb += Vocab[*Op];
+  auto InstVector =
+      Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb;
+  if (const auto *IC = dyn_cast<CmpInst>(&I))
+    InstVector += Vocab[IC->getPredicate()];
+  return InstVector;
+}
+
+Embedding FlowAwareEmbedder::computeEmbeddings(const Instruction &I) const {
+  // If we have already computed the embedding for this instruction, return it
+  auto It = InstVecMap.find(&I);
+  if (It != InstVecMap.end())
+    return It->second;
 
-  // We consider only the non-debug and non-pseudo instructions
-  for (const auto &I : BB.instructionsWithoutDebug()) {
-    // TODO: Handle call instructions differently.
-    // For now, we treat them like other instructions
-    Embedding ArgEmb(Dimension, 0);
-    for (const auto &Op : I.operands()) {
-      // If the operand is defined elsewhere, we use its embedding
-      if (const auto *DefInst = dyn_cast<Instruction>(Op)) {
-        auto DefIt = InstVecMap.find(DefInst);
-        // Fixme (#159171): Ideally we should never miss an instruction
-        // embedding here.
-        // But when we have cyclic dependencies (e.g., phi
-        // nodes), we might miss the embedding. In such cases, we fall back to
-        // using the vocabulary embedding. This can be fixed by iterating to a
-        // fixed-point, or by using a simple solver for the set of simultaneous
-        // equations.
-        // Another case when we might miss an instruction embedding is when
-        // the operand instruction is in a different basic block that has not
-        // been processed yet. This can be fixed by processing the basic blocks
-        // in a topological order.
-        if (DefIt != InstVecMap.end())
-          ArgEmb += DefIt->second;
-        else
-          ArgEmb += Vocab[*Op];
-      }
-      // If the operand is not defined by an instruction, we use the vocabulary
-      else {
-        LLVM_DEBUG(errs() << "Using embedding from vocabulary for operand: "
-                          << *Op << "=" << Vocab[*Op][0] << "\n");
+  // TODO: Handle call instructions differently.
+  // For now, we treat them like other instructions
+  Embedding ArgEmb(Dimension, 0);
+  for (const auto &Op : I.operands()) {
+    // If the operand is defined elsewhere, we use its embedding
+    if (const auto *DefInst = dyn_cast<Instruction>(Op)) {
+      auto DefIt = InstVecMap.find(DefInst);
+      // Fixme (#159171): Ideally we should never miss an instruction
+      // embedding here.
+      // But when we have cyclic dependencies (e.g., phi
+      // nodes), we might miss the embedding. In such cases, we fall back to
+      // using the vocabulary embedding. This can be fixed by iterating to a
+      // fixed-point, or by using a simple solver for the set of simultaneous
+      // equations.
+      // Another case when we might miss an instruction embedding is when
+      // the operand instruction is in a different basic block that has not
+      // been processed yet. This can be fixed by processing the basic blocks
+      // in a topological order.
+      if (DefIt != InstVecMap.end())
+        ArgEmb += DefIt->second;
+      else
         ArgEmb += Vocab[*Op];
-      }
     }
-    // Create the instruction vector by combining opcode, type, and arguments
-    // embeddings
-    auto InstVector =
-        Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb;
-    // Add compare predicate embedding as an additional operand if applicable
-    if (const auto *IC = dyn_cast<CmpInst>(&I))
-      InstVector += Vocab[IC->getPredicate()];
-    InstVecMap[&I] = InstVector;
-    BBVector += InstVector;
+    // If the operand is not defined by an instruction, we use the
+    // vocabulary
+    else {
+      LLVM_DEBUG(errs() << "Using embedding from vocabulary for operand: "
+                        << *Op << "=" << Vocab[*Op][0] << "\n");
+      ArgEmb += Vocab[*Op];
+    }
   }
-  BBVecMap[&BB] = BBVector;
+  // Create the instruction vector by combining opcode, type, and arguments
+  // embeddings
+  auto InstVector =
+      Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb;
+  if (const auto *IC = dyn_cast<CmpInst>(&I))
+    InstVector += Vocab[IC->getPredicate()];
+  InstVecMap[&I] = InstVector;
+  return InstVector;
 }
 
 // ==----------------------------------------------------------------------===//
@@ -695,25 +665,17 @@ PreservedAnalyses IR2VecPrinterPass::run(Module &M,
     Emb->getFunctionVector().print(OS);
 
     OS << "Basic block vectors:\n";
-    const auto &BBMap = Emb->getBBVecMap();
     for (const BasicBlock &BB : F) {
-      auto It = BBMap.find(&BB);
-      if (It != BBMap.end()) {
-        OS << "Basic block: " << BB.getName() << ":\n";
-        It->second.print(OS);
-      }
+      OS << "Basic block: " << BB.getName() << ":\n";
+      Emb->getBBVector(BB).print(OS);
     }
 
     OS << "Instruction vectors:\n";
-    const auto &InstMap = Emb->getInstVecMap();
     for (const BasicBlock &BB : F) {
       for (const Instruction &I : BB) {
-        auto It = InstMap.find(&I);
-        if (It != InstMap.end()) {
-          OS << "Instruction: ";
-          I.print(OS);
-          It->second.print(OS);
-        }
+        OS << "Instruction: ";
+        I.print(OS);
+        Emb->getInstVector(I).print(OS);
       }
     }
   }
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index ebfea8e..e17a214 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -2051,6 +2051,12 @@ bool RegisterCoalescer::joinCopy(
   }
 
   if (CP.getNewRC()) {
+    if (RegClassInfo.getNumAllocatableRegs(CP.getNewRC()) == 0) {
+      LLVM_DEBUG(dbgs() << "\tNo " << TRI->getRegClassName(CP.getNewRC())
+                        << "are available for allocation\n");
+      return false;
+    }
+
     auto SrcRC = MRI->getRegClass(CP.getSrcReg());
     auto DstRC = MRI->getRegClass(CP.getDstReg());
     unsigned SrcIdx = CP.getSrcIdx();
diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp
index 7509188..fba6942 100644
--- a/llvm/lib/IR/ConstantFPRange.cpp
+++ b/llvm/lib/IR/ConstantFPRange.cpp
@@ -391,3 +391,23 @@ ConstantFPRange ConstantFPRange::unionWith(const ConstantFPRange &CR) const {
   return ConstantFPRange(minnum(Lower, CR.Lower), maxnum(Upper, CR.Upper),
                          MayBeQNaN | CR.MayBeQNaN, MayBeSNaN | CR.MayBeSNaN);
 }
+
+ConstantFPRange ConstantFPRange::abs() const {
+  if (isNaNOnly())
+    return *this;
+  // Check if the range is all non-negative or all non-positive.
+  if (Lower.isNegative() == Upper.isNegative()) {
+    if (Lower.isNegative())
+      return negate();
+    return *this;
+  }
+  // The range contains both positive and negative values.
+  APFloat NewLower = APFloat::getZero(getSemantics());
+  APFloat NewUpper = maxnum(-Lower, Upper);
+  return ConstantFPRange(std::move(NewLower), std::move(NewUpper), MayBeQNaN,
+                         MayBeSNaN);
+}
+
+ConstantFPRange ConstantFPRange::negate() const {
+  return ConstantFPRange(-Upper, -Lower, MayBeQNaN, MayBeSNaN);
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 557d87f..56807a4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -5053,16 +5053,18 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       //
       // vdst, srcA, srcB, srcC
       const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+      bool UseAGPRForm = !Subtarget.hasGFX90AInsts() ||
+                         Info->selectAGPRFormMFMA(MinNumRegsRequired);
+
       OpdsMapping[0] =
-          Info->getMinNumAGPRs() >= MinNumRegsRequired
-              ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
-              : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+          UseAGPRForm ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
+                      : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
       OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
       OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
       OpdsMapping[4] =
-          Info->getMinNumAGPRs() >= MinNumRegsRequired
-              ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
-              : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+          UseAGPRForm ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
+                      : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
       break;
     }
     case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
@@ -5115,11 +5117,21 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
     case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
     case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: {
+      Register DstReg = MI.getOperand(0).getReg();
+      unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
+      unsigned MinNumRegsRequired = DstSize / 32;
+      const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+      bool UseAGPRForm = Info->selectAGPRFormMFMA(MinNumRegsRequired);
+
       // vdst, srcA, srcB, srcC, idx
-      OpdsMapping[0] = getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+      OpdsMapping[0] = UseAGPRForm ? getAGPROpMapping(DstReg, MRI, *TRI)
+                                   : getVGPROpMapping(DstReg, MRI, *TRI);
+
       OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
       OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
-      OpdsMapping[4] = getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+      OpdsMapping[4] =
+          UseAGPRForm ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
+                      : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
       OpdsMapping[5] = getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
       break;
     }
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index b7dbb59..2c1a13c 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -1202,6 +1202,12 @@ public:
 
   unsigned getMinNumAGPRs() const { return MinNumAGPRs; }
 
+  /// Return true if an MFMA that requires at least \p NumRegs should select to
+  /// the AGPR form, instead of the VGPR form.
+  bool selectAGPRFormMFMA(unsigned NumRegs) const {
+    return !MFMAVGPRForm && getMinNumAGPRs() >= NumRegs;
+  }
+
   // \returns true if a function has a use of AGPRs via inline asm or
   // has a call which may use it.
   bool mayUseAGPRs(const Function &F) const;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 7cfd059..6500fce 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -964,14 +964,12 @@ class MAIFrag<SDPatternOperator Op, bit HasAbid = true, bit Scaled = false> : Pa
 class CanUseAGPR_MAI<ValueType vt> {
   code PredicateCode = [{
     return !Subtarget->hasGFX90AInsts() ||
-      (!SIMachineFunctionInfo::MFMAVGPRForm &&
-       MF->getInfo<SIMachineFunctionInfo>()->getMinNumAGPRs() >=
-  }] # !srl(vt.Size, 5) # ");";
+      MF->getInfo<SIMachineFunctionInfo>()->selectAGPRFormMFMA(
+        }] # !srl(vt.Size, 5) # ");";
 
   code GISelPredicateCode = [{
     return !Subtarget->hasGFX90AInsts() ||
-      (!SIMachineFunctionInfo::MFMAVGPRForm &&
-       MF.getInfo<SIMachineFunctionInfo>()->getMinNumAGPRs() >=
+      MF.getInfo<SIMachineFunctionInfo>()->selectAGPRFormMFMA(
   }] # !srl(vt.Size, 5) # ");";
 }
 
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index a0acfcf..85ce944 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -699,35 +699,20 @@ def: OpR_RR_pat<C2_cmpgtp,    setgt,          i1,   I64>;
 def: OpR_RR_pat<C2_cmpgtup,   setugt,         i1,   I64>;
 def: OpR_RR_pat<C2_cmpgtp,    RevCmp<setlt>,  i1,   I64>;
 def: OpR_RR_pat<C2_cmpgtup,   RevCmp<setult>, i1,   I64>;
-def: OpR_RR_pat<A2_vcmpbeq,   seteq,          i1,   V8I8>;
 def: OpR_RR_pat<A2_vcmpbeq,   seteq,          v8i1, V8I8>;
-def: OpR_RR_pat<A4_vcmpbgt,   RevCmp<setlt>,  i1,   V8I8>;
 def: OpR_RR_pat<A4_vcmpbgt,   RevCmp<setlt>,  v8i1, V8I8>;
-def: OpR_RR_pat<A4_vcmpbgt,   setgt,          i1,   V8I8>;
 def: OpR_RR_pat<A4_vcmpbgt,   setgt,          v8i1, V8I8>;
-def: OpR_RR_pat<A2_vcmpbgtu,  RevCmp<setult>, i1,   V8I8>;
 def: OpR_RR_pat<A2_vcmpbgtu,  RevCmp<setult>, v8i1, V8I8>;
-def: OpR_RR_pat<A2_vcmpbgtu,  setugt,         i1,   V8I8>;
 def: OpR_RR_pat<A2_vcmpbgtu,  setugt,         v8i1, V8I8>;
-def: OpR_RR_pat<A2_vcmpheq,   seteq,          i1,   V4I16>;
 def: OpR_RR_pat<A2_vcmpheq,   seteq,          v4i1, V4I16>;
-def: OpR_RR_pat<A2_vcmphgt,   RevCmp<setlt>,  i1,   V4I16>;
 def: OpR_RR_pat<A2_vcmphgt,   RevCmp<setlt>,  v4i1, V4I16>;
-def: OpR_RR_pat<A2_vcmphgt,   setgt,          i1,   V4I16>;
 def: OpR_RR_pat<A2_vcmphgt,   setgt,          v4i1, V4I16>;
-def: OpR_RR_pat<A2_vcmphgtu,  RevCmp<setult>, i1,   V4I16>;
 def: OpR_RR_pat<A2_vcmphgtu,  RevCmp<setult>, v4i1, V4I16>;
-def: OpR_RR_pat<A2_vcmphgtu,  setugt,         i1,   V4I16>;
 def: OpR_RR_pat<A2_vcmphgtu,  setugt,         v4i1, V4I16>;
-def: OpR_RR_pat<A2_vcmpweq,   seteq,          i1,   V2I32>;
 def: OpR_RR_pat<A2_vcmpweq,   seteq,          v2i1, V2I32>;
-def: OpR_RR_pat<A2_vcmpwgt,   RevCmp<setlt>,  i1,   V2I32>;
 def: OpR_RR_pat<A2_vcmpwgt,   RevCmp<setlt>,  v2i1, V2I32>;
-def: OpR_RR_pat<A2_vcmpwgt,   setgt,          i1,   V2I32>;
 def: OpR_RR_pat<A2_vcmpwgt,   setgt,          v2i1, V2I32>;
-def: OpR_RR_pat<A2_vcmpwgtu,  RevCmp<setult>, i1,   V2I32>;
 def: OpR_RR_pat<A2_vcmpwgtu,  RevCmp<setult>, v2i1, V2I32>;
-def: OpR_RR_pat<A2_vcmpwgtu,  setugt,         i1,   V2I32>;
 def: OpR_RR_pat<A2_vcmpwgtu,  setugt,         v2i1, V2I32>;
 
 def: OpR_RR_pat<F2_sfcmpeq,   seteq,          i1, F32>;
@@ -1213,12 +1198,6 @@ def: OpR_RI_pat<S2_asl_i_r,  Shl, i32,   I32,   u5_0ImmPred>;
 def: OpR_RI_pat<S2_asr_i_p,  Sra, i64,   I64,   u6_0ImmPred>;
 def: OpR_RI_pat<S2_lsr_i_p,  Srl, i64,   I64,   u6_0ImmPred>;
 def: OpR_RI_pat<S2_asl_i_p,  Shl, i64,   I64,   u6_0ImmPred>;
-def: OpR_RI_pat<S2_asr_i_vh, Sra, v4i16, V4I16, u4_0ImmPred>;
-def: OpR_RI_pat<S2_lsr_i_vh, Srl, v4i16, V4I16, u4_0ImmPred>;
-def: OpR_RI_pat<S2_asl_i_vh, Shl, v4i16, V4I16, u4_0ImmPred>;
-def: OpR_RI_pat<S2_asr_i_vh, Sra, v2i32, V2I32, u5_0ImmPred>;
-def: OpR_RI_pat<S2_lsr_i_vh, Srl, v2i32, V2I32, u5_0ImmPred>;
-def: OpR_RI_pat<S2_asl_i_vh, Shl, v2i32, V2I32, u5_0ImmPred>;
 
 def: OpR_RR_pat<S2_asr_r_r, Sra, i32, I32, I32>;
 def: OpR_RR_pat<S2_lsr_r_r, Srl, i32, I32, I32>;
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 40c05e8..333b693 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1823,6 +1823,11 @@ def TuneConditionalCompressedMoveFusion
 def HasConditionalMoveFusion : Predicate<"Subtarget->hasConditionalMoveFusion()">;
 def NoConditionalMoveFusion  : Predicate<"!Subtarget->hasConditionalMoveFusion()">;
 
+def TuneHasSingleElementVecFP64
+    : SubtargetFeature<"single-element-vec-fp64", "HasSingleElementVectorFP64", "true",
+                       "Certain vector FP64 operations produce a single result "
+                       "element per cycle">;
+
 def TuneMIPSP8700
     : SubtargetFeature<"mips-p8700", "RISCVProcFamily", "MIPSP8700",
                        "MIPS p8700 processor">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
index 6d86aff..3658817 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
@@ -14,6 +14,10 @@
 // otherwise.
 def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>;
 
+// This scheduling predicate is true when subtarget feature TuneHasSingleElementVecFP64
+// is enabled.
+def SingleElementVecFP64SchedPred : FeatureSchedPredicate<TuneHasSingleElementVecFP64>;
+
 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
 def isSEXT_W
     : TIIPredicate<"isSEXT_W",
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 17a7948..e86431f 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -338,7 +338,8 @@ def SIFIVE_X390 : RISCVProcessorModel<"sifive-x390",
                                        FeatureStdExtZvl1024b,
                                        FeatureVendorXSiFivecdiscarddlone,
                                        FeatureVendorXSiFivecflushdlone],
-                                      SiFiveIntelligenceTuneFeatures>;
+                                      !listconcat(SiFiveIntelligenceTuneFeatures,
+                                                  [TuneHasSingleElementVecFP64])>;
 
 defvar SiFiveP400TuneFeatures = [TuneNoDefaultUnroll,
                                  TuneConditionalCompressedMoveFusion,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 3e07eff..f863392a 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -317,7 +317,6 @@ multiclass SiFive7WriteResBase<int VLEN,
     ProcResourceKind VL, ProcResourceKind VS,
     ProcResourceKind VCQ,
     SiFive7FPLatencies fpLatencies,
-    bit isFP64Throttled = false,
     bit hasFastGather = false> {
 
   // Branching
@@ -832,29 +831,56 @@ multiclass SiFive7WriteResBase<int VLEN,
   // 13. Vector Floating-Point Instructions
   foreach mx = SchedMxListF in {
     foreach sew = SchedSEWSet<mx, isF=1>.val in {
-      defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 64)),
-                          SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
-                          SiFive7GetCyclesDefault<mx>.c);
-      defvar Lat8 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 8);
-      defvar VA = !if(!and(isFP64Throttled, !eq(sew, 64)), VA1, VA1OrVA2);
       defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
-      let Latency = Lat8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
-        defm : LMULSEWWriteResMXSEW<"WriteVFALUV",  [VCQ, VA], mx, sew, IsWorstCase>;
-        defm : LMULSEWWriteResMXSEW<"WriteVFALUF",  [VCQ, VA], mx, sew, IsWorstCase>;
-        defm : LMULSEWWriteResMXSEW<"WriteVFMulV",  [VCQ, VA], mx, sew, IsWorstCase>;
-        defm : LMULSEWWriteResMXSEW<"WriteVFMulF",  [VCQ, VA], mx, sew, IsWorstCase>;
-        defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA], mx, sew, IsWorstCase>;
-        defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA], mx, sew, IsWorstCase>;
-        defm : LMULSEWWriteResMXSEW<"WriteVFRecpV",   [VCQ, VA1], mx, sew, IsWorstCase>;
-        defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
-      }
-      defvar Lat4 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 4);
-      let Latency = Lat4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
-        defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV",   [VCQ, VA], mx, sew, IsWorstCase>;
-        defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF",   [VCQ, VA], mx, sew, IsWorstCase>;
-        // min max require merge
-        defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>;
-        defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>;
+      if !eq(sew, 64) then {
+        defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
+        foreach SchedWriteName = ["WriteVFALUV", "WriteVFALUF", "WriteVFMulV", "WriteVFMulF",
+                                  "WriteVFMulAddV", "WriteVFMulAddF"] in
+        defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
+                                           // Predicated
+                                           [VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)],
+                                           // Not Predicated
+                                           [VCQ, VA1OrVA2], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
+                                           mx, sew, IsWorstCase>;
+        foreach SchedWriteName = ["WriteVFRecpV", "WriteVFCvtIToFV"] in
+        defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
+                                           // Predicated
+                                           [VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)],
+                                           // Not Predicated
+                                           [VCQ, VA1], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
+                                           mx, sew, IsWorstCase>;
+        foreach SchedWriteName = ["WriteVFSgnjV", "WriteVFSgnjF"] in
+        defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
+                                           // Predicated
+                                           [VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)],
+                                           // Not Predicated
+                                           [VCQ, VA1OrVA2], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
+                                           mx, sew, IsWorstCase>;
+        foreach SchedWriteName = ["WriteVFMinMaxV", "WriteVFMinMaxF"] in
+        defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
+                                           // Predicated
+                                           [VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)],
+                                           // Not Predicated
+                                           [VCQ, VA1], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
+                                           mx, sew, IsWorstCase>;
+      } else {
+        let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in {
+          defm : LMULSEWWriteResMXSEW<"WriteVFALUV",  [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+          defm : LMULSEWWriteResMXSEW<"WriteVFALUF",  [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+          defm : LMULSEWWriteResMXSEW<"WriteVFMulV",  [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+          defm : LMULSEWWriteResMXSEW<"WriteVFMulF",  [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+          defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+          defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+          defm : LMULSEWWriteResMXSEW<"WriteVFRecpV",   [VCQ, VA1], mx, sew, IsWorstCase>;
+          defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+        }
+        let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in {
+          defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV",   [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+          defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF",   [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+          // min max require merge
+          defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>;
+          defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>;
+        }
       }
     }
   }
@@ -892,19 +918,28 @@ multiclass SiFive7WriteResBase<int VLEN,
   // Widening
   foreach mx = SchedMxListW in {
     foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
-      defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
-                          SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
-                          SiFive7GetCyclesDefault<mx>.c);
       defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
-      let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
-      defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+      defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c;
+      if !eq(sew, 32) then {
+        defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
+        defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtIToFV", SingleElementVecFP64SchedPred,
+                                           // Predicated
+                                           [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
+                                           // Not Predicated
+                                           [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
+                                           mx, sew, IsWorstCase>;
+      } else {
+        let Latency = 8,
+            AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in
+        defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+      }
     }
   }
   foreach mx = SchedMxListFW in {
     foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
-      defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+      defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c;
       defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
-      let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+      let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in {
         defm : LMULSEWWriteResMXSEW<"WriteVFWALUV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
         defm : LMULSEWWriteResMXSEW<"WriteVFWALUF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
         defm : LMULSEWWriteResMXSEW<"WriteVFWMulV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
@@ -912,11 +947,19 @@ multiclass SiFive7WriteResBase<int VLEN,
         defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
         defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
       }
-      defvar CvtCycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
-                          SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
-                          SiFive7GetCyclesDefault<mx>.c);
-      let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, CvtCycles)] in
-      defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+      if !eq(sew, 32) then {
+        defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
+        defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtFToFV", SingleElementVecFP64SchedPred,
+                                           // Predicated
+                                           [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
+                                           // Not Predicated
+                                           [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
+                                           mx, sew, IsWorstCase>;
+      } else {
+        let Latency = 8,
+            AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in
+        defm : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+      }
     }
     defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
@@ -933,13 +976,23 @@ multiclass SiFive7WriteResBase<int VLEN,
   }
   foreach mx = SchedMxListFW in {
     foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
-      defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
-                          SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
-                          SiFive7GetCyclesNarrowing<mx>.c);
       defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
-      let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
-        defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
-        defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+      defvar DefaultCycles = SiFive7GetCyclesNarrowing<mx>.c;
+      if !eq(sew, 32) then {
+        defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
+        foreach SchedWriteName = ["WriteVFNCvtIToFV", "WriteVFNCvtFToFV"] in
+        defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
+                                           // Predicated
+                                           [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
+                                           // Not Predicated
+                                           [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
+                                           mx, sew, IsWorstCase>;
+      } else {
+        let Latency = 8,
+            AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in {
+          defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+          defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+        }
       }
     }
   }
@@ -1499,7 +1552,6 @@ multiclass SiFive7ReadAdvance {
 /// eventually be supplied by different SchedMachineModels.
 multiclass SiFive7SchedResources<int vlen, bit extraVALU,
                                  SiFive7FPLatencies fpLatencies,
-                                 bit isFP64Throttled,
                                  bit hasFastGather> {
   defm SiFive7 : SiFive7ProcResources<extraVALU>;
 
@@ -1527,8 +1579,7 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU,
       : SiFive7WriteResBase<vlen, SiFive7PipeA, SiFive7PipeB, SiFive7PipeAB,
                             SiFive7IDiv, SiFive7FDiv, SiFive7VA1,
                             SiFive7VA1OrVA2, SiFive7VL, SiFive7VS,
-                            SiFive7VCQ, fpLatencies, isFP64Throttled,
-                            hasFastGather>;
+                            SiFive7VCQ, fpLatencies, hasFastGather>;
 
   //===----------------------------------------------------------------------===//
   // Bypass and advance
@@ -1560,7 +1611,6 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel {
   bit HasExtraVALU = false;
 
   SiFive7FPLatencies FPLatencies;
-  bit IsFP64Throttled = false;
   bit HasFastGather = false;
 
   string Name = !subst("Model", "", !subst("SiFive7", "", NAME));
@@ -1587,7 +1637,6 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> {
 def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
   let HasExtraVALU = true;
   let FPLatencies = SiFive7LowFPLatencies;
-  let IsFP64Throttled = true;
   let HasFastGather = true;
 }
 
@@ -1596,7 +1645,6 @@ foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in {
   let SchedModel = model in
   defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU,
                                           model.FPLatencies,
-                                          model.IsFP64Throttled,
                                           model.HasFastGather>;
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 01a4308..d11b446 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -128,6 +128,22 @@ multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred,
                                     IsWorstCase>;
 }
 
+multiclass LMULSEWWriteResMXSEWVariant<string name, SchedPredicateBase Pred,
+                                       list<ProcResourceKind> predResources,
+                                       int predLat, list<int> predAcquireCycles,
+                                       list<int> predReleaseCycles,
+                                       list<ProcResourceKind> noPredResources,
+                                       int noPredLat, list<int> noPredAcquireCycles,
+                                       list<int> noPredReleaseCycles,
+                                       string mx, int sew, bit IsWorstCase> {
+  defm "" : LMULWriteResVariantImpl<name, name # "_" # mx # "_E" # sew, Pred, predResources,
+                                    predLat, predAcquireCycles,
+                                    predReleaseCycles, noPredResources,
+                                    noPredLat, noPredAcquireCycles,
+                                    noPredReleaseCycles,
+                                    IsWorstCase>;
+}
+
 // Define multiclasses to define SchedWrite, SchedRead,  WriteRes, and
 // ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the
 // SchedMxList variants above. Each multiclass is responsible for defining
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 9b272c4..3ddf182 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -28,6 +28,10 @@ using namespace PatternMatch;
 
 #define DEBUG_TYPE "instcombine"
 
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+}
+
 /// This is the complement of getICmpCode, which turns an opcode and two
 /// operands into either a constant true or false, or a brand new ICmp
 /// instruction. The sign is passed in to determine which kind of predicate to
@@ -1272,7 +1276,8 @@ Value *InstCombinerImpl::foldEqOfParts(Value *Cmp0, Value *Cmp1, bool IsAnd) {
 static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
                                           bool IsAnd, bool IsLogical,
                                           InstCombiner::BuilderTy &Builder,
-                                          const SimplifyQuery &Q) {
+                                          const SimplifyQuery &Q,
+                                          Instruction &I) {
   // Match an equality compare with a non-poison constant as Cmp0.
   // Also, give up if the compare can be constant-folded to avoid looping.
   CmpPredicate Pred0;
@@ -1306,9 +1311,12 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
       return nullptr;
     SubstituteCmp = Builder.CreateICmp(Pred1, Y, C);
   }
-  if (IsLogical)
-    return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp)
-                 : Builder.CreateLogicalOr(Cmp0, SubstituteCmp);
+  if (IsLogical) {
+    Instruction *MDFrom =
+        ProfcheckDisableMetadataFixes && isa<SelectInst>(I) ? nullptr : &I;
+    return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp, "", MDFrom)
+                 : Builder.CreateLogicalOr(Cmp0, SubstituteCmp, "", MDFrom);
+  }
   return Builder.CreateBinOp(IsAnd ? Instruction::And : Instruction::Or, Cmp0,
                              SubstituteCmp);
 }
@@ -3396,13 +3404,13 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
                                                   /*IsLogical*/ false, Builder))
     return V;
 
-  if (Value *V =
-          foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical, Builder, Q))
+  if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical,
+                                             Builder, Q, I))
     return V;
   // We can convert this case to bitwise and, because both operands are used
   // on the LHS, and as such poison from both will propagate.
-  if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, IsAnd,
-                                             /*IsLogical=*/false, Builder, Q)) {
+  if (Value *V = foldAndOrOfICmpsWithConstEq(
+          RHS, LHS, IsAnd, /*IsLogical=*/false, Builder, Q, I)) {
     // If RHS is still used, we should drop samesign flag.
     if (IsLogical && RHS->hasSameSign() && !RHS->use_empty()) {
       RHS->setSameSign(false);
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 3f7003d..f4e05a2 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -389,6 +389,22 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
   return OS;
 }
 
+/// Helper to get the successor corresponding to a particular case value for
+/// a switch statement.
+static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch,
+                                        const APInt &NextState) {
+  BasicBlock *NextCase = nullptr;
+  for (auto Case : Switch->cases()) {
+    if (Case.getCaseValue()->getValue() == NextState) {
+      NextCase = Case.getCaseSuccessor();
+      break;
+    }
+  }
+  if (!NextCase)
+    NextCase = Switch->getDefaultDest();
+  return NextCase;
+}
+
 namespace {
 /// ThreadingPath is a path in the control flow of a loop that can be threaded
 /// by cloning necessary basic blocks and replacing conditional branches with
@@ -401,6 +417,10 @@ struct ThreadingPath {
     ExitVal = V->getValue();
     IsExitValSet = true;
   }
+  void setExitValue(const APInt &V) {
+    ExitVal = V;
+    IsExitValSet = true;
+  }
   bool isExitValueSet() const { return IsExitValSet; }
 
   /// Determinator is the basic block that determines the next state of the DFA.
@@ -583,44 +603,8 @@ struct AllSwitchPaths {
   BasicBlock *getSwitchBlock() { return SwitchBlock; }
 
   void run() {
-    StateDefMap StateDef = getStateDefMap();
-    if (StateDef.empty()) {
-      ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
-                                        Switch)
-               << "Switch instruction is not predictable.";
-      });
-      return;
-    }
-
-    auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0));
-    auto *SwitchPhiDefBB = SwitchPhi->getParent();
-    VisitedBlocks VB;
-    // Get paths from the determinator BBs to SwitchPhiDefBB
-    std::vector<ThreadingPath> PathsToPhiDef =
-        getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths);
-    if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) {
-      TPaths = std::move(PathsToPhiDef);
-      return;
-    }
-
-    assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty());
-    auto PathsLimit = MaxNumPaths / PathsToPhiDef.size();
-    // Find and append paths from SwitchPhiDefBB to SwitchBlock.
-    PathsType PathsToSwitchBB =
-        paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit);
-    if (PathsToSwitchBB.empty())
-      return;
-
-    std::vector<ThreadingPath> TempList;
-    for (const ThreadingPath &Path : PathsToPhiDef) {
-      for (const PathType &PathToSw : PathsToSwitchBB) {
-        ThreadingPath PathCopy(Path);
-        PathCopy.appendExcludingFirst(PathToSw);
-        TempList.push_back(PathCopy);
-      }
-    }
-    TPaths = std::move(TempList);
+    findTPaths();
+    unifyTPaths();
   }
 
 private:
@@ -812,6 +796,69 @@ private:
     return Res;
   }
 
+  // Find all threadable paths.
+  void findTPaths() {
+    StateDefMap StateDef = getStateDefMap();
+    if (StateDef.empty()) {
+      ORE->emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
+                                        Switch)
+               << "Switch instruction is not predictable.";
+      });
+      return;
+    }
+
+    auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0));
+    auto *SwitchPhiDefBB = SwitchPhi->getParent();
+    VisitedBlocks VB;
+    // Get paths from the determinator BBs to SwitchPhiDefBB
+    std::vector<ThreadingPath> PathsToPhiDef =
+        getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths);
+    if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) {
+      TPaths = std::move(PathsToPhiDef);
+      return;
+    }
+
+    assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty());
+    auto PathsLimit = MaxNumPaths / PathsToPhiDef.size();
+    // Find and append paths from SwitchPhiDefBB to SwitchBlock.
+    PathsType PathsToSwitchBB =
+        paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit);
+    if (PathsToSwitchBB.empty())
+      return;
+
+    std::vector<ThreadingPath> TempList;
+    for (const ThreadingPath &Path : PathsToPhiDef) {
+      for (const PathType &PathToSw : PathsToSwitchBB) {
+        ThreadingPath PathCopy(Path);
+        PathCopy.appendExcludingFirst(PathToSw);
+        TempList.push_back(PathCopy);
+      }
+    }
+    TPaths = std::move(TempList);
+  }
+
+  // Two states are equivalent if they have the same switch destination.
+  // Unify the states in different threading path if the states are equivalent.
+  void unifyTPaths() {
+    llvm::SmallDenseMap<BasicBlock *, APInt> DestToState;
+    for (ThreadingPath &Path : TPaths) {
+      APInt NextState = Path.getExitValue();
+      BasicBlock *Dest = getNextCaseSuccessor(Switch, NextState);
+      auto StateIt = DestToState.find(Dest);
+      if (StateIt == DestToState.end()) {
+        DestToState.insert({Dest, NextState});
+        continue;
+      }
+
+      if (NextState != StateIt->second) {
+        LLVM_DEBUG(dbgs() << "Next state in " << Path << " is equivalent to "
+                          << StateIt->second << "\n");
+        Path.setExitValue(StateIt->second);
+      }
+    }
+  }
+
   unsigned NumVisited = 0;
   SwitchInst *Switch;
   BasicBlock *SwitchBlock;
@@ -1335,21 +1382,6 @@ private:
     return It != ClonedBBs.end() ? (*It).BB : nullptr;
   }
 
-  /// Helper to get the successor corresponding to a particular case value for
-  /// a switch statement.
-  BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) {
-    BasicBlock *NextCase = nullptr;
-    for (auto Case : Switch->cases()) {
-      if (Case.getCaseValue()->getValue() == NextState) {
-        NextCase = Case.getCaseSuccessor();
-        break;
-      }
-    }
-    if (!NextCase)
-      NextCase = Switch->getDefaultDest();
-    return NextCase;
-  }
-
   /// Returns true if IncomingBB is a predecessor of BB.
   bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
     return llvm::is_contained(predecessors(BB), IncomingBB);
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 155fcc5..9ac3be1 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5959,7 +5959,11 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
     unsigned PreviousEdges = OtherCases->size();
     if (OtherDest == SI->getDefaultDest())
       ++PreviousEdges;
-    for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
+    unsigned E = PreviousEdges - 1;
+    // Remove all incoming values from OtherDest if OtherDest is unreachable.
+    if (NewBI->isUnconditional())
+      ++E;
+    for (unsigned I = 0; I != E; ++I)
       cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
   }
 
diff --git a/llvm/test/Analysis/IR2Vec/unreachable.ll b/llvm/test/Analysis/IR2Vec/unreachable.ll
index 9be0ee1..627e2c9 100644
--- a/llvm/test/Analysis/IR2Vec/unreachable.ll
+++ b/llvm/test/Analysis/IR2Vec/unreachable.ll
@@ -30,13 +30,17 @@ return:                                           ; preds = %if.else, %if.then
   %4 = load i32, ptr %retval, align 4
   ret i32 %4
 }
-
-; CHECK: Basic block vectors:
+; We'll get individual basic block embeddings for all blocks in the function.
+; But unreachable blocks are not counted for computing the function embedding.
+; CHECK: Function vector:  [ 1301.20  1318.20  1335.20 ]
+; CHECK-NEXT: Basic block vectors:
 ; CHECK-NEXT: Basic block: entry:
 ; CHECK-NEXT: [ 816.20  825.20  834.20 ]
 ; CHECK-NEXT: Basic block: if.then:
 ; CHECK-NEXT: [ 195.00  198.00  201.00 ]
 ; CHECK-NEXT: Basic block: if.else:
 ; CHECK-NEXT: [ 195.00  198.00  201.00 ]
+; CHECK-NEXT: Basic block: unreachable:
+; CHECK-NEXT:  [ 101.00  103.00  105.00 ]
 ; CHECK-NEXT: Basic block: return:
 ; CHECK-NEXT: [ 95.00  97.00  99.00 ]
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.ll b/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.ll
new file mode 100644
index 0000000..f466513
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
+; Make sure the coalescer doesn't introduce any uses of
+; vreg_1024. None are available to allocate with the register budget
+; of this function.
+
+define void @no_introduce_vreg_1024() #0 {
+; CHECK-LABEL: no_introduce_vreg_1024:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def v[0:7]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v9, v0
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[0:15]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %tuple = call <8 x i32> asm sideeffect "; def $0","=v"()
+  %sub0 = extractelement <8 x i32> %tuple, i32 0
+  %insert = insertelement <16 x i32> poison, i32 %sub0, i32 9
+  call void asm sideeffect "; use $0","v"(<16 x i32> %insert)
+  ret void
+}
+
+attributes #0 = { nounwind "amdgpu-waves-per-eu"="10,10" }
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.mir b/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.mir
new file mode 100644
index 0000000..1f414eb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.mir
@@ -0,0 +1,34 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=register-coalescer -o - %s | FileCheck %s
+
+# The register budget for this function does not permit using 1024-bit
+# registers. The coalescer should not introduce a 1024-bit virtual
+# register which will fail to allocate.
+
+--- |
+  define void @no_introduce_vreg_1024() #0 {
+    ret void
+  }
+
+  attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
+...
+---
+name:            no_introduce_vreg_1024
+tracksRegLiveness: true
+machineFunctionInfo:
+  occupancy:       10
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+
+    ; CHECK-LABEL: name: no_introduce_vreg_1024
+    ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub9:vreg_512 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: SI_RETURN implicit [[COPY1]]
+    %0:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    undef %1.sub9:vreg_512 = COPY %0.sub0
+    SI_RETURN implicit %1
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
index ee11b92..0c1448a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
@@ -44,23 +44,23 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_f16__vgpr(ptr addrspace(1) %
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    global_load_dwordx4 v[14:17], v0, s[6:7]
+; GISEL-NEXT:    global_load_dwordx4 v[8:11], v0, s[6:7]
 ; GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x44
 ; GISEL-NEXT:    s_load_dword s16, s[4:5], 0x64
-; GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[2:3]
+; GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[0:1]
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GISEL-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
 ; GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
 ; GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
 ; GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; GISEL-NEXT:    v_mov_b32_e32 v12, s16
+; GISEL-NEXT:    v_mov_b32_e32 v16, s16
 ; GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GISEL-NEXT:    s_nop 0
-; GISEL-NEXT:    v_smfmac_f32_16x16x64_f16 v[14:17], v[8:11], v[0:7], v12 cbsz:1 abid:2
+; GISEL-NEXT:    v_smfmac_f32_16x16x64_f16 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GISEL-NEXT:    s_nop 6
-; GISEL-NEXT:    global_store_dwordx4 v0, v[14:17], s[6:7]
+; GISEL-NEXT:    global_store_dwordx4 v0, v[8:11], s[6:7]
 ; GISEL-NEXT:    s_endpgm
 bb:
   %id = call i32 @llvm.amdgcn.workitem.id.x()
@@ -834,24 +834,24 @@ define amdgpu_kernel void @test_smfmac_i32_16x16x128_i8__vgpr(ptr addrspace(1) %
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    global_load_dwordx4 v[14:17], v0, s[0:1]
+; GISEL-NEXT:    global_load_dwordx4 v[8:11], v0, s[0:1]
 ; GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; GISEL-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x54
 ; GISEL-NEXT:    s_load_dword s2, s[4:5], 0x64
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[8:9]
-; GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[10:11]
+; GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[10:11]
+; GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[8:9]
 ; GISEL-NEXT:    v_mov_b64_e32 v[0:1], s[12:13]
 ; GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[14:15]
 ; GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[16:17]
 ; GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[18:19]
-; GISEL-NEXT:    v_mov_b32_e32 v12, s2
+; GISEL-NEXT:    v_mov_b32_e32 v16, s2
 ; GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GISEL-NEXT:    s_nop 0
-; GISEL-NEXT:    v_smfmac_i32_16x16x128_i8 v[14:17], v[8:11], v[0:7], v12 cbsz:1 abid:2
+; GISEL-NEXT:    v_smfmac_i32_16x16x128_i8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GISEL-NEXT:    s_nop 6
-; GISEL-NEXT:    global_store_dwordx4 v0, v[14:17], s[0:1]
+; GISEL-NEXT:    global_store_dwordx4 v0, v[8:11], s[0:1]
 ; GISEL-NEXT:    s_endpgm
 bb:
   %id = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1349,24 +1349,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_bf8__vgpr(ptr addrspace
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    global_load_dwordx4 v[14:17], v0, s[0:1]
+; GISEL-NEXT:    global_load_dwordx4 v[8:11], v0, s[0:1]
 ; GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; GISEL-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x54
 ; GISEL-NEXT:    s_load_dword s2, s[4:5], 0x64
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[8:9]
-; GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[10:11]
+; GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[10:11]
+; GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[8:9]
 ; GISEL-NEXT:    v_mov_b64_e32 v[0:1], s[12:13]
 ; GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[14:15]
 ; GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[16:17]
 ; GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[18:19]
-; GISEL-NEXT:    v_mov_b32_e32 v12, s2
+; GISEL-NEXT:    v_mov_b32_e32 v16, s2
 ; GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GISEL-NEXT:    s_nop 0
-; GISEL-NEXT:    v_smfmac_f32_16x16x128_bf8_bf8 v[14:17], v[8:11], v[0:7], v12 cbsz:1 abid:2
+; GISEL-NEXT:    v_smfmac_f32_16x16x128_bf8_bf8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GISEL-NEXT:    s_nop 6
-; GISEL-NEXT:    global_store_dwordx4 v0, v[14:17], s[0:1]
+; GISEL-NEXT:    global_store_dwordx4 v0, v[8:11], s[0:1]
 ; GISEL-NEXT:    s_endpgm
 bb:
   %id = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1513,24 +1513,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_fp8__vgpr(ptr addrspace
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    global_load_dwordx4 v[14:17], v0, s[0:1]
+; GISEL-NEXT:    global_load_dwordx4 v[8:11], v0, s[0:1]
 ; GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; GISEL-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x54
 ; GISEL-NEXT:    s_load_dword s2, s[4:5], 0x64
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[8:9]
-; GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[10:11]
+; GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[10:11]
+; GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[8:9]
 ; GISEL-NEXT:    v_mov_b64_e32 v[0:1], s[12:13]
 ; GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[14:15]
 ; GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[16:17]
 ; GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[18:19]
-; GISEL-NEXT:    v_mov_b32_e32 v12, s2
+; GISEL-NEXT:    v_mov_b32_e32 v16, s2
 ; GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GISEL-NEXT:    s_nop 0
-; GISEL-NEXT:    v_smfmac_f32_16x16x128_bf8_fp8 v[14:17], v[8:11], v[0:7], v12 cbsz:1 abid:2
+; GISEL-NEXT:    v_smfmac_f32_16x16x128_bf8_fp8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GISEL-NEXT:    s_nop 6
-; GISEL-NEXT:    global_store_dwordx4 v0, v[14:17], s[0:1]
+; GISEL-NEXT:    global_store_dwordx4 v0, v[8:11], s[0:1]
 ; GISEL-NEXT:    s_endpgm
 bb:
   %id = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1677,24 +1677,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_bf8__vgpr(ptr addrspace
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    global_load_dwordx4 v[14:17], v0, s[0:1]
+; GISEL-NEXT:    global_load_dwordx4 v[8:11], v0, s[0:1]
 ; GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; GISEL-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x54
 ; GISEL-NEXT:    s_load_dword s2, s[4:5], 0x64
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[8:9]
-; GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[10:11]
+; GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[10:11]
+; GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[8:9]
 ; GISEL-NEXT:    v_mov_b64_e32 v[0:1], s[12:13]
 ; GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[14:15]
 ; GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[16:17]
 ; GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[18:19]
-; GISEL-NEXT:    v_mov_b32_e32 v12, s2
+; GISEL-NEXT:    v_mov_b32_e32 v16, s2
 ; GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GISEL-NEXT:    s_nop 0
-; GISEL-NEXT:    v_smfmac_f32_16x16x128_fp8_bf8 v[14:17], v[8:11], v[0:7], v12 cbsz:1 abid:2
+; GISEL-NEXT:    v_smfmac_f32_16x16x128_fp8_bf8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GISEL-NEXT:    s_nop 6
-; GISEL-NEXT:    global_store_dwordx4 v0, v[14:17], s[0:1]
+; GISEL-NEXT:    global_store_dwordx4 v0, v[8:11], s[0:1]
 ; GISEL-NEXT:    s_endpgm
 bb:
   %id = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1841,24 +1841,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_fp8__vgpr(ptr addrspace
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    global_load_dwordx4 v[14:17], v0, s[0:1]
+; GISEL-NEXT:    global_load_dwordx4 v[8:11], v0, s[0:1]
 ; GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; GISEL-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x54
 ; GISEL-NEXT:    s_load_dword s2, s[4:5], 0x64
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[8:9]
-; GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[10:11]
+; GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[10:11]
+; GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[8:9]
 ; GISEL-NEXT:    v_mov_b64_e32 v[0:1], s[12:13]
 ; GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[14:15]
 ; GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[16:17]
 ; GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[18:19]
-; GISEL-NEXT:    v_mov_b32_e32 v12, s2
+; GISEL-NEXT:    v_mov_b32_e32 v16, s2
 ; GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GISEL-NEXT:    s_nop 0
-; GISEL-NEXT:    v_smfmac_f32_16x16x128_fp8_fp8 v[14:17], v[8:11], v[0:7], v12 cbsz:1 abid:2
+; GISEL-NEXT:    v_smfmac_f32_16x16x128_fp8_fp8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GISEL-NEXT:    s_nop 6
-; GISEL-NEXT:    global_store_dwordx4 v0, v[14:17], s[0:1]
+; GISEL-NEXT:    global_store_dwordx4 v0, v[8:11], s[0:1]
 ; GISEL-NEXT:    s_endpgm
 bb:
   %id = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index 1a7a72d..693a40d 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -142,6 +142,7 @@
 ; CHECK-NEXT:   shvstvecd                        - 'Shvstvecd' (vstvec supports Direct mode).
 ; CHECK-NEXT:   shxadd-load-fusion               - Enable SH(1|2|3)ADD(.UW) + load macrofusion.
 ; CHECK-NEXT:   sifive7                          - SiFive 7-Series processors.
+; CHECK-NEXT:   single-element-vec-fp64          - Certain vector FP64 operations produce a single result element per cycle.
 ; CHECK-NEXT:   smaia                            - 'Smaia' (Advanced Interrupt Architecture Machine Level).
 ; CHECK-NEXT:   smcdeleg                         - 'Smcdeleg' (Counter Delegation Machine Level).
 ; CHECK-NEXT:   smcntrpmf                        - 'Smcntrpmf' (Cycle and Instret Privilege Mode Filtering).
diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll
index 663f459..de38752 100644
--- a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll
@@ -227,10 +227,6 @@ define i32 @test3(i32 %num) {
 ; CHECK-NEXT:      i32 1, label [[CASE1:%.*]]
 ; CHECK-NEXT:      i32 2, label [[CASE2:%.*]]
 ; CHECK-NEXT:    ]
-; CHECK:       for.body.jt4:
-; CHECK-NEXT:    [[COUNT_JT4:%.*]] = phi i32 [ [[INC_JT4:%.*]], [[FOR_INC_JT4:%.*]] ]
-; CHECK-NEXT:    [[STATE_JT4:%.*]] = phi i32 [ [[STATE_NEXT_JT4:%.*]], [[FOR_INC_JT4]] ]
-; CHECK-NEXT:    br label [[FOR_INC_JT1]]
 ; CHECK:       for.body.jt3:
 ; CHECK-NEXT:    [[COUNT_JT3:%.*]] = phi i32 [ [[INC_JT3:%.*]], [[FOR_INC_JT3:%.*]] ]
 ; CHECK-NEXT:    [[STATE_JT3:%.*]] = phi i32 [ [[STATE_NEXT_JT3:%.*]], [[FOR_INC_JT3]] ]
@@ -261,17 +257,14 @@ define i32 @test3(i32 %num) {
 ; CHECK:       sel.2.si.unfold.false:
 ; CHECK-NEXT:    [[DOTSI_UNFOLD_PHI1:%.*]] = phi i32 [ 4, [[SEL_2_SI_UNFOLD_TRUE_JT3]] ]
 ; CHECK-NEXT:    br label [[SEL_3_SI_UNFOLD_FALSE]]
-; CHECK:       sel.2.si.unfold.false.jt4:
+; CHECK:       sel.2.si.unfold.false.jt3:
 ; CHECK-NEXT:    [[DOTSI_UNFOLD_PHI1_JT4:%.*]] = phi i32 [ 4, [[SEL_2_SI_UNFOLD_TRUE:%.*]] ]
-; CHECK-NEXT:    br label [[SEL_3_SI_UNFOLD_FALSE_JT4:%.*]]
+; CHECK-NEXT:    br label [[SEL_3_SI_UNFOLD_FALSE_JT3]]
 ; CHECK:       sel.3.si.unfold.false:
 ; CHECK-NEXT:    [[SEL_2_SI_UNFOLD_PHI:%.*]] = phi i32 [ poison, [[SEL_2_SI_UNFOLD_TRUE]] ], [ [[DOTSI_UNFOLD_PHI1]], [[SEL_2_SI_UNFOLD_FALSE]] ]
 ; CHECK-NEXT:    br label [[FOR_INC]]
-; CHECK:       sel.3.si.unfold.false.jt4:
-; CHECK-NEXT:    [[SEL_2_SI_UNFOLD_PHI_JT4:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI1_JT4]], [[SEL_2_SI_UNFOLD_FALSE_JT4]] ]
-; CHECK-NEXT:    br label [[FOR_INC_JT4]]
 ; CHECK:       sel.3.si.unfold.false.jt3:
-; CHECK-NEXT:    [[SEL_2_SI_UNFOLD_PHI_JT3:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI_JT3]], [[SEL_2_SI_UNFOLD_TRUE_JT3]] ]
+; CHECK-NEXT:    [[SEL_2_SI_UNFOLD_PHI_JT3:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI_JT3]], [[SEL_2_SI_UNFOLD_TRUE_JT3]] ], [ [[DOTSI_UNFOLD_PHI1_JT4]], [[SEL_2_SI_UNFOLD_FALSE_JT4]] ]
 ; CHECK-NEXT:    br label [[FOR_INC_JT3]]
 ; CHECK:       sel.1.si.unfold.true:
 ; CHECK-NEXT:    br i1 [[CMP_1]], label [[FOR_INC]], label [[SEL_1_SI_UNFOLD_FALSE_JT2:%.*]]
@@ -289,11 +282,6 @@ define i32 @test3(i32 %num) {
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[COUNT5]], 1
 ; CHECK-NEXT:    [[CMP_EXIT:%.*]] = icmp slt i32 [[INC]], [[NUM:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP_EXIT]], label [[FOR_BODY]], label [[FOR_END:%.*]]
-; CHECK:       for.inc.jt4:
-; CHECK-NEXT:    [[STATE_NEXT_JT4]] = phi i32 [ [[SEL_2_SI_UNFOLD_PHI_JT4]], [[SEL_3_SI_UNFOLD_FALSE_JT4]] ]
-; CHECK-NEXT:    [[INC_JT4]] = add nsw i32 undef, 1
-; CHECK-NEXT:    [[CMP_EXIT_JT4:%.*]] = icmp slt i32 [[INC_JT4]], [[NUM]]
-; CHECK-NEXT:    br i1 [[CMP_EXIT_JT4]], label [[FOR_BODY_JT4:%.*]], label [[FOR_END]]
 ; CHECK:       for.inc.jt3:
 ; CHECK-NEXT:    [[STATE_NEXT_JT3]] = phi i32 [ [[SEL_2_SI_UNFOLD_PHI_JT3]], [[SEL_3_SI_UNFOLD_FALSE_JT3]] ]
 ; CHECK-NEXT:    [[INC_JT3]] = add nsw i32 [[COUNT5]], 1
@@ -305,8 +293,8 @@ define i32 @test3(i32 %num) {
 ; CHECK-NEXT:    [[CMP_EXIT_JT2:%.*]] = icmp slt i32 [[INC_JT2]], [[NUM]]
 ; CHECK-NEXT:    br i1 [[CMP_EXIT_JT2]], label [[FOR_BODY_JT2]], label [[FOR_END]]
 ; CHECK:       for.inc.jt1:
-; CHECK-NEXT:    [[COUNT4:%.*]] = phi i32 [ [[COUNT_JT4]], [[FOR_BODY_JT4]] ], [ [[COUNT_JT3]], [[FOR_BODY_JT3]] ], [ [[COUNT5]], [[SEL_1_SI_UNFOLD_TRUE_JT1]] ], [ [[COUNT]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[STATE_NEXT_JT1]] = phi i32 [ 1, [[FOR_BODY]] ], [ 1, [[FOR_BODY_JT3]] ], [ 1, [[FOR_BODY_JT4]] ], [ [[DOTSI_UNFOLD_PHI2_JT1]], [[SEL_1_SI_UNFOLD_TRUE_JT1]] ]
+; CHECK-NEXT:    [[COUNT4:%.*]] = phi i32 [ [[COUNT_JT3]], [[FOR_BODY_JT3]] ], [ [[COUNT5]], [[SEL_1_SI_UNFOLD_TRUE_JT1]] ], [ [[COUNT]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[STATE_NEXT_JT1]] = phi i32 [ 1, [[FOR_BODY]] ], [ 1, [[FOR_BODY_JT3]] ], [ [[DOTSI_UNFOLD_PHI2_JT1]], [[SEL_1_SI_UNFOLD_TRUE_JT1]] ]
 ; CHECK-NEXT:    [[INC_JT1]] = add nsw i32 [[COUNT4]], 1
 ; CHECK-NEXT:    [[CMP_EXIT_JT1:%.*]] = icmp slt i32 [[INC_JT1]], [[NUM]]
 ; CHECK-NEXT:    br i1 [[CMP_EXIT_JT1]], label [[FOR_BODY_JT1]], label [[FOR_END]]
@@ -402,36 +390,28 @@ define void @pr65222(i32 %flags, i1 %cmp, i1 %tobool.not) {
 ; CHECK-NEXT:    br label [[IF_END_JT2:%.*]]
 ; CHECK:       cond1.si.unfold.true:
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_END]], label [[COND1_SI_UNFOLD_FALSE_JT1:%.*]]
-; CHECK:       cond1.si.unfold.true.jt3:
+; CHECK:       cond1.si.unfold.true.jt2:
 ; CHECK-NEXT:    [[DOTSI_UNFOLD_PHI2:%.*]] = phi i32 [ 3, [[THEN]] ]
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF_END_JT3:%.*]], label [[COND1_SI_UNFOLD_FALSE:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_END_JT2]], label [[COND1_SI_UNFOLD_FALSE:%.*]]
 ; CHECK:       cond1.si.unfold.false:
 ; CHECK-NEXT:    [[DOTSI_UNFOLD_PHI3:%.*]] = phi i32 [ 1, [[COND1_SI_UNFOLD_TRUE]] ]
 ; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       cond1.si.unfold.false.jt1:
+; CHECK:       cond1.si.unfold.false.jt2:
 ; CHECK-NEXT:    [[DOTSI_UNFOLD_PHI3_JT1:%.*]] = phi i32 [ 1, [[COND1_SI_UNFOLD_TRUE1:%.*]] ]
-; CHECK-NEXT:    br label [[IF_END_JT1:%.*]]
+; CHECK-NEXT:    br label [[IF_END_JT2]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[UNFOLDED:%.*]] = phi i32 [ [[FLAGS:%.*]], [[WHILE_COND]] ], [ [[COND_SI_UNFOLD_PHI]], [[TOUNFOLD_SI_UNFOLD_FALSE1]] ], [ poison, [[COND1_SI_UNFOLD_TRUE1]] ], [ [[DOTSI_UNFOLD_PHI3]], [[COND1_SI_UNFOLD_FALSE]] ]
 ; CHECK-NEXT:    [[OTHER:%.*]] = phi i32 [ [[FLAGS]], [[WHILE_COND]] ], [ 0, [[TOUNFOLD_SI_UNFOLD_FALSE1]] ], [ 0, [[COND1_SI_UNFOLD_TRUE1]] ], [ 0, [[COND1_SI_UNFOLD_FALSE]] ]
 ; CHECK-NEXT:    switch i32 [[UNFOLDED]], label [[UNREACHABLE:%.*]] [
 ; CHECK-NEXT:      i32 0, label [[SW_BB:%.*]]
 ; CHECK-NEXT:    ]
-; CHECK:       if.end.jt1:
-; CHECK-NEXT:    [[UNFOLDED_JT1:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI3_JT1]], [[COND1_SI_UNFOLD_FALSE_JT1]] ]
-; CHECK-NEXT:    [[OTHER_JT1:%.*]] = phi i32 [ 0, [[COND1_SI_UNFOLD_FALSE_JT1]] ]
-; CHECK-NEXT:    br label [[UNREACHABLE]]
-; CHECK:       if.end.jt3:
-; CHECK-NEXT:    [[UNFOLDED_JT3:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI2]], [[COND1_SI_UNFOLD_TRUE]] ]
-; CHECK-NEXT:    [[OTHER_JT3:%.*]] = phi i32 [ 0, [[COND1_SI_UNFOLD_TRUE]] ]
-; CHECK-NEXT:    br label [[UNREACHABLE]]
 ; CHECK:       if.end.jt0:
 ; CHECK-NEXT:    [[UNFOLDED_JT0:%.*]] = phi i32 [ [[COND_SI_UNFOLD_PHI_JT0]], [[TOUNFOLD_SI_UNFOLD_FALSE_JT0]] ]
 ; CHECK-NEXT:    [[OTHER_JT0:%.*]] = phi i32 [ 0, [[TOUNFOLD_SI_UNFOLD_FALSE_JT0]] ]
 ; CHECK-NEXT:    br label [[SW_BB]]
 ; CHECK:       if.end.jt2:
-; CHECK-NEXT:    [[UNFOLDED_JT2:%.*]] = phi i32 [ [[COND_SI_UNFOLD_PHI_JT2]], [[TOUNFOLD_SI_UNFOLD_FALSE]] ]
-; CHECK-NEXT:    [[OTHER_JT2:%.*]] = phi i32 [ 0, [[TOUNFOLD_SI_UNFOLD_FALSE]] ]
+; CHECK-NEXT:    [[UNFOLDED_JT2:%.*]] = phi i32 [ [[COND_SI_UNFOLD_PHI_JT2]], [[TOUNFOLD_SI_UNFOLD_FALSE]] ], [ [[DOTSI_UNFOLD_PHI2]], [[COND1_SI_UNFOLD_TRUE]] ], [ [[DOTSI_UNFOLD_PHI3_JT1]], [[COND1_SI_UNFOLD_FALSE_JT1]] ]
+; CHECK-NEXT:    [[OTHER_JT2:%.*]] = phi i32 [ 0, [[TOUNFOLD_SI_UNFOLD_FALSE]] ], [ 0, [[COND1_SI_UNFOLD_TRUE]] ], [ 0, [[COND1_SI_UNFOLD_FALSE_JT1]] ]
 ; CHECK-NEXT:    br label [[UNREACHABLE]]
 ; CHECK:       unreachable:
 ; CHECK-NEXT:    unreachable
diff --git a/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll b/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll
new file mode 100644
index 0000000..4555dfb
--- /dev/null
+++ b/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll
@@ -0,0 +1,281 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s
+
+declare void @do_something()
+declare void @user(i32)
+
+define void @equivalent_on_default(i1 %c1) {
+; CHECK-LABEL: define void @equivalent_on_default(
+; CHECK-SAME: i1 [[C1:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[SWITCH_BB:%.*]]
+; CHECK:       switch_bb:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ poison, [[CASE2END:%.*]] ]
+; CHECK-NEXT:    switch i32 [[PHI]], label [[DEFAULT_DEST:%.*]] [
+; CHECK-NEXT:      i32 0, label [[CASE1:%.*]]
+; CHECK-NEXT:      i32 1, label [[CASE2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       switch_bb.jt2:
+; CHECK-NEXT:    [[PHI_JT2:%.*]] = phi i32 [ [[PHI_CASE2_JT2:%.*]], [[CASE2END_JT2:%.*]] ]
+; CHECK-NEXT:    br label [[DEFAULT_DEST]]
+; CHECK:       switch_bb.jt1:
+; CHECK-NEXT:    [[PHI_JT1:%.*]] = phi i32 [ 1, [[CASE1]] ]
+; CHECK-NEXT:    br label [[CASE2]]
+; CHECK:       case1:
+; CHECK-NEXT:    br label [[SWITCH_BB_JT1:%.*]]
+; CHECK:       case2:
+; CHECK-NEXT:    br i1 [[C1]], label [[CASE2THEN:%.*]], label [[CASE2END_JT2]]
+; CHECK:       case2then:
+; CHECK-NEXT:    br label [[CASE2END_JT2]]
+; CHECK:       case2end:
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label [[SWITCH_BB]]
+; CHECK:       case2end.jt2:
+; CHECK-NEXT:    [[PHI_CASE2_JT2]] = phi i32 [ 2, [[CASE2]] ], [ 3, [[CASE2THEN]] ]
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label [[SWITCH_BB_JT2:%.*]]
+; CHECK:       default_dest:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %switch_bb
+
+switch_bb:
+  %phi = phi i32 [ 0, %entry ], [ 1, %case1 ], [ %phi_case2, %case2end ]
+  switch i32 %phi, label %default_dest [
+  i32 0, label %case1
+  i32 1, label %case2
+  ]
+
+case1:
+  br label %switch_bb
+
+case2:
+  br i1 %c1, label %case2then, label %case2end
+
+case2then:
+  br label %case2end
+
+case2end:
+  %phi_case2 = phi i32 [ 2, %case2 ] , [ 3, %case2then ]
+  call void @do_something()
+  br label %switch_bb
+
+default_dest:
+  ret void
+}
+
+define void @equivalent_on_default_user(i1 %c1) {
+; CHECK-LABEL: define void @equivalent_on_default_user(
+; CHECK-SAME: i1 [[C1:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[SWITCH_BB:%.*]]
+; CHECK:       switch_bb:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ poison, [[CASE2END:%.*]] ]
+; CHECK-NEXT:    switch i32 [[PHI]], label [[DEFAULT_DEST:%.*]] [
+; CHECK-NEXT:      i32 0, label [[CASE1:%.*]]
+; CHECK-NEXT:      i32 1, label [[CASE2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       switch_bb.jt2:
+; CHECK-NEXT:    [[PHI_JT2:%.*]] = phi i32 [ [[PHI_CASE2_JT2:%.*]], [[CASE2END_JT2:%.*]] ]
+; CHECK-NEXT:    br label [[DEFAULT_DEST]]
+; CHECK:       switch_bb.jt1:
+; CHECK-NEXT:    [[PHI_JT1:%.*]] = phi i32 [ 1, [[CASE1]] ]
+; CHECK-NEXT:    br label [[CASE2]]
+; CHECK:       case1:
+; CHECK-NEXT:    br label [[SWITCH_BB_JT1:%.*]]
+; CHECK:       case2:
+; CHECK-NEXT:    br i1 [[C1]], label [[CASE2THEN:%.*]], label [[CASE2END_JT2]]
+; CHECK:       case2then:
+; CHECK-NEXT:    br label [[CASE2END_JT2]]
+; CHECK:       case2end:
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @user(i32 poison)
+; CHECK-NEXT:    br label [[SWITCH_BB]]
+; CHECK:       case2end.jt2:
+; CHECK-NEXT:    [[PHI_CASE2_JT2]] = phi i32 [ 2, [[CASE2]] ], [ 3, [[CASE2THEN]] ]
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @user(i32 [[PHI_CASE2_JT2]])
+; CHECK-NEXT:    br label [[SWITCH_BB_JT2:%.*]]
+; CHECK:       default_dest:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %switch_bb
+
+switch_bb:
+  %phi = phi i32 [ 0, %entry ], [ 1, %case1 ], [ %phi_case2, %case2end ]
+  switch i32 %phi, label %default_dest [
+  i32 0, label %case1
+  i32 1, label %case2
+  ]
+
+case1:
+  br label %switch_bb
+
+case2:
+  br i1 %c1, label %case2then, label %case2end
+
+case2then:
+  br label %case2end
+
+case2end:
+  %phi_case2 = phi i32 [ 2, %case2 ] , [ 3, %case2then ]
+  call void @do_something()
+  call void @user(i32 %phi_case2)
+  br label %switch_bb
+
+default_dest:
+  ret void
+}
+
+define void @equivalent_only_cases(i1 %c1) {
+; CHECK-LABEL: define void @equivalent_only_cases(
+; CHECK-SAME: i1 [[C1:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[SWITCH_BB:%.*]]
+; CHECK:       switch_bb:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ poison, [[CASE2END:%.*]] ]
+; CHECK-NEXT:    switch i32 [[PHI]], label [[DEFAULT_DEST:%.*]] [
+; CHECK-NEXT:      i32 0, label [[CASE1:%.*]]
+; CHECK-NEXT:      i32 1, label [[CASE2:%.*]]
+; CHECK-NEXT:      i32 2, label [[CASE1]]
+; CHECK-NEXT:      i32 3, label [[CASE1]]
+; CHECK-NEXT:    ]
+; CHECK:       switch_bb.jt2:
+; CHECK-NEXT:    [[PHI_JT2:%.*]] = phi i32 [ [[PHI_CASE2_JT2:%.*]], [[CASE2END_JT2:%.*]] ]
+; CHECK-NEXT:    br label [[CASE1]]
+; CHECK:       switch_bb.jt1:
+; CHECK-NEXT:    [[PHI_JT1:%.*]] = phi i32 [ 1, [[CASE1]] ]
+; CHECK-NEXT:    br label [[CASE2]]
+; CHECK:       case1:
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label [[SWITCH_BB_JT1:%.*]]
+; CHECK:       case2:
+; CHECK-NEXT:    br i1 [[C1]], label [[CASE2THEN:%.*]], label [[CASE2END_JT2]]
+; CHECK:       case2then:
+; CHECK-NEXT:    br label [[CASE2END_JT2]]
+; CHECK:       case2end:
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label [[SWITCH_BB]]
+; CHECK:       case2end.jt2:
+; CHECK-NEXT:    [[PHI_CASE2_JT2]] = phi i32 [ 2, [[CASE2]] ], [ 3, [[CASE2THEN]] ]
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label [[SWITCH_BB_JT2:%.*]]
+; CHECK:       default_dest:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %switch_bb
+
+switch_bb:
+  %phi = phi i32 [ 0, %entry ], [ 1, %case1 ], [ %phi_case2, %case2end ]
+  switch i32 %phi, label %default_dest [
+  i32 0, label %case1
+  i32 1, label %case2
+  i32 2, label %case1
+  i32 3, label %case1
+  ]
+
+case1:
+  call void @do_something()
+  br label %switch_bb
+
+case2:
+  br i1 %c1, label %case2then, label %case2end
+
+case2then:
+  br label %case2end
+
+case2end:
+  %phi_case2 = phi i32 [ 2, %case2 ] , [ 3, %case2then ]
+  call void @do_something()
+  br label %switch_bb
+
+default_dest:
+  ret void
+}
+
+define void @equivalent_both_case_and_default(i1 %c1, i1 %c2) {
+; CHECK-LABEL: define void @equivalent_both_case_and_default(
+; CHECK-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[SWITCH_BB:%.*]]
+; CHECK:       switch_bb:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ poison, [[CASE2END:%.*]] ]
+; CHECK-NEXT:    switch i32 [[PHI]], label [[DEFAULT_DEST:%.*]] [
+; CHECK-NEXT:      i32 0, label [[CASE1:%.*]]
+; CHECK-NEXT:      i32 1, label [[CASE2:%.*]]
+; CHECK-NEXT:      i32 2, label [[CASE1]]
+; CHECK-NEXT:      i32 3, label [[CASE1]]
+; CHECK-NEXT:    ]
+; CHECK:       switch_bb.jt4:
+; CHECK-NEXT:    [[PHI_JT3:%.*]] = phi i32 [ [[PHI_CASE2_JT3:%.*]], [[CASE2END_JT3:%.*]] ]
+; CHECK-NEXT:    br label [[DEFAULT_DEST]]
+; CHECK:       switch_bb.jt2:
+; CHECK-NEXT:    [[PHI_JT2:%.*]] = phi i32 [ [[PHI_CASE2_JT2:%.*]], [[CASE2END_JT2:%.*]] ]
+; CHECK-NEXT:    br label [[CASE1]]
+; CHECK:       switch_bb.jt1:
+; CHECK-NEXT:    [[PHI_JT1:%.*]] = phi i32 [ 1, [[CASE1]] ]
+; CHECK-NEXT:    br label [[CASE2]]
+; CHECK:       case1:
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label [[SWITCH_BB_JT1:%.*]]
+; CHECK:       case2:
+; CHECK-NEXT:    br i1 [[C1]], label [[CASE2THEN:%.*]], label [[CASE2END_JT2]]
+; CHECK:       case2then:
+; CHECK-NEXT:    br i1 [[C2]], label [[CASE2THEN2:%.*]], label [[CASE2END_JT2]]
+; CHECK:       case2then2:
+; CHECK-NEXT:    br i1 [[C2]], label [[CASE2THEN3:%.*]], label [[CASE2END_JT3]]
+; CHECK:       case2then3:
+; CHECK-NEXT:    br label [[CASE2END_JT3]]
+; CHECK:       case2end:
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label [[SWITCH_BB]]
+; CHECK:       case2end.jt4:
+; CHECK-NEXT:    [[PHI_CASE2_JT3]] = phi i32 [ 4, [[CASE2THEN2]] ], [ 5, [[CASE2THEN3]] ]
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label [[SWITCH_BB_JT3:%.*]]
+; CHECK:       case2end.jt2:
+; CHECK-NEXT:    [[PHI_CASE2_JT2]] = phi i32 [ 2, [[CASE2]] ], [ 3, [[CASE2THEN]] ]
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label [[SWITCH_BB_JT2:%.*]]
+; CHECK:       default_dest:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %switch_bb
+
+switch_bb:
+  %phi = phi i32 [ 0, %entry ], [ 1, %case1 ], [ %phi_case2, %case2end ]
+  switch i32 %phi, label %default_dest [
+  i32 0, label %case1
+  i32 1, label %case2
+  i32 2, label %case1
+  i32 3, label %case1
+  ]
+
+case1:
+  call void @do_something()
+  br label %switch_bb
+
+case2:
+  br i1 %c1, label %case2then, label %case2end
+
+case2then:
+  br i1 %c2, label %case2then2, label %case2end
+
+case2then2:
+  br i1 %c2, label %case2then3, label %case2end
+
+case2then3:
+  br label %case2end
+
+case2end:
+  %phi_case2 = phi i32 [ 2, %case2 ], [ 3, %case2then ], [ 4, %case2then2 ], [ 5, %case2then3 ]
+  call void @do_something()
+  br label %switch_bb
+
+default_dest:
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
index ebea5bf..d88eaf8 100644
--- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
+++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
@@ -1,8 +1,11 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
 declare i1 @gen1()
 
+;.
+; CHECK: @glb = global i8 0
+;.
 define i1 @cond_eq_and(i8 %X, i8 %Y, i8 noundef %C) {
 ; CHECK-LABEL: @cond_eq_and(
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[X:%.*]], [[C:%.*]]
@@ -16,16 +19,16 @@ define i1 @cond_eq_and(i8 %X, i8 %Y, i8 noundef %C) {
   ret i1 %res
 }
 
-define i1 @cond_eq_and_const(i8 %X, i8 %Y) {
+define i1 @cond_eq_and_const(i8 %X, i8 %Y) !prof !0 {
 ; CHECK-LABEL: @cond_eq_and_const(
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[X:%.*]], 10
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[Y:%.*]], 10
-; CHECK-NEXT:    [[RES:%.*]] = select i1 [[COND]], i1 [[TMP1]], i1 false
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[COND]], i1 [[TMP1]], i1 false, !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
   %cond = icmp eq i8 %X, 10
   %lhs = icmp ult i8 %X, %Y
-  %res = select i1 %cond, i1 %lhs, i1 false
+  %res = select i1 %cond, i1 %lhs, i1 false, !prof !1
   ret i1 %res
 }
 
@@ -42,16 +45,16 @@ define i1 @cond_eq_or(i8 %X, i8 %Y, i8 noundef %C) {
   ret i1 %res
 }
 
-define i1 @cond_eq_or_const(i8 %X, i8 %Y) {
+define i1 @cond_eq_or_const(i8 %X, i8 %Y) !prof !0 {
 ; CHECK-LABEL: @cond_eq_or_const(
 ; CHECK-NEXT:    [[COND:%.*]] = icmp ne i8 [[X:%.*]], 10
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[Y:%.*]], 10
-; CHECK-NEXT:    [[RES:%.*]] = select i1 [[COND]], i1 true, i1 [[TMP1]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[COND]], i1 true, i1 [[TMP1]], !prof [[PROF1]]
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
   %cond = icmp ne i8 %X, 10
   %lhs = icmp ult i8 %X, %Y
-  %res = select i1 %cond, i1 true, i1 %lhs
+  %res = select i1 %cond, i1 true, i1 %lhs, !prof !1
   ret i1 %res
 }
 
@@ -793,3 +796,10 @@ define <2 x i1> @not_logical_and2(i1 %b, <2 x i32> %a) {
   %or = select <2 x i1> %and, <2 x i1> <i1 true, i1 true>, <2 x i1> %implied
   ret <2 x i1> %or
 }
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 2, i32 3}
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+;.
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll b/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
index 0fc3c19..a43e762 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
@@ -401,4 +401,27 @@ b:
   ret i32 %1
 }
 
+define i32 @else_will_be_unreachable(i1 %arg) {
+; CHECK-LABEL: @else_will_be_unreachable(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I:%.*]] = select i1 [[ARG:%.*]], i32 0, i32 1
+; CHECK-NEXT:    ret i32 [[I]]
+;
+entry:
+  switch i1 %arg, label %else [
+  i1 false, label %if
+  i1 true, label %if
+  ]
+
+if:
+  br i1 %arg, label %else, label %bb
+
+bb:
+  br label %else
+
+else:
+  %i = phi i32 [ 0, %entry ], [ 0, %if ], [ 1, %bb ]
+  ret i32 %i
+}
+
 declare void @bar(ptr nonnull dereferenceable(4))
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveX390/vector-fp.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveX390/vector-fp.s
index e1e9b57..64e3ed9 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SiFiveX390/vector-fp.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveX390/vector-fp.s
@@ -2323,13 +2323,13 @@ vfncvt.rod.f.f.w v8, v16
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_F_F_W vfncvt.f.f.w	v8, v16
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_ROD_F_F_W vfncvt.rod.f.f.w	v8, v16
 # CHECK-NEXT:  1      3     1.00                  U      1     VLEN1024X300SiFive7PipeA,VLEN1024X300SiFive7PipeAB VSETVLI            vsetvli	zero, zero, e64, m1, tu, mu
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv	v8, v16, v24
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf	v8, v16, fs0
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv	v8, v16, v24
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf	v8, v16, fs0
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf	v8, v16, fs0
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv	v8, v16, v24
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      23    16.00                        23    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      23    16.00                        23    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      23    16.00                        23    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      23    16.00                        23    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      23    16.00                        23    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      23    16.00                        23    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      23    16.00                        23    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf	v8, v16, fs0
 # CHECK-NEXT:  1      228   228.00                       228   VLEN1024X300SiFive7VA1[1,229],VLEN1024X300SiFive7VA1OrVA2[1,229],VLEN1024X300SiFive7VCQ VFDIV_VV vfdiv.vv	v8, v16, v24
 # CHECK-NEXT:  1      228   228.00                       228   VLEN1024X300SiFive7VA1[1,229],VLEN1024X300SiFive7VA1OrVA2[1,229],VLEN1024X300SiFive7VCQ VFDIV_VF vfdiv.vf	v8, v16, fs0
 # CHECK-NEXT:  1      228   228.00                       228   VLEN1024X300SiFive7VA1[1,229],VLEN1024X300SiFive7VA1OrVA2[1,229],VLEN1024X300SiFive7VCQ VFRDIV_VF vfrdiv.vf	v8, v16, fs0
@@ -2352,22 +2352,22 @@ vfncvt.rod.f.f.w v8, v16
 # CHECK-NEXT:  1      228   228.00                       228   VLEN1024X300SiFive7VA1[1,229],VLEN1024X300SiFive7VA1OrVA2[1,229],VLEN1024X300SiFive7VCQ VFSQRT_V vfsqrt.v	v8, v24
 # CHECK-NEXT:  1      8     2.00                         8     VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFRSQRT7_V vfrsqrt7.v	v8, v24
 # CHECK-NEXT:  1      8     2.00                         8     VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFREC7_V vfrec7.v	v8, v24
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv	v8, v16, v24
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf	v8, v16, fs0
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv	v8, v16, v24
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf	v8, v16, fs0
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv	v8, v16, v24
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf	v8, v16, fs0
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv	v8, v16, v24
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf	v8, v16, fs0
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv	v8, v16, v24
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      19    16.00                        19    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      19    16.00                        19    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      19    16.00                        19    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      19    16.00                        19    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      19    16.00                        19    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      19    16.00                        19    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      19    16.00                        19    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      19    16.00                        19    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      19    16.00                        19    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      19    16.00                        19    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf	v8, v16, fs0
 # CHECK-NEXT:  1      8     2.00                         8     VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFCVT_XU_F_V vfcvt.xu.f.v	v8, v16
 # CHECK-NEXT:  1      8     2.00                         8     VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFCVT_X_F_V vfcvt.x.f.v	v8, v16
 # CHECK-NEXT:  1      8     2.00                         8     VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFCVT_RTZ_XU_F_V vfcvt.rtz.xu.f.v	v8, v16
 # CHECK-NEXT:  1      8     2.00                         8     VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFCVT_RTZ_X_F_V vfcvt.rtz.x.f.v	v8, v16
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v	v8, v16
-# CHECK-NEXT:  1      16    16.00                        16    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      23    16.00                        23    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      23    16.00                        23    VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v	v8, v16
 # CHECK-NEXT:  1      8     2.00                         8     VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFWCVT_XU_F_V vfwcvt.xu.f.v	v8, v16
 # CHECK-NEXT:  1      8     2.00                         8     VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFWCVT_X_F_V vfwcvt.x.f.v	v8, v16
 # CHECK-NEXT:  1      8     2.00                         8     VLEN1024X300SiFive7VA1[1,3],VLEN1024X300SiFive7VA1OrVA2[1,3],VLEN1024X300SiFive7VCQ VFWCVT_RTZ_XU_F_V vfwcvt.rtz.xu.f.v	v8, v16
@@ -2384,13 +2384,13 @@ vfncvt.rod.f.f.w v8, v16
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_F_F_W vfncvt.f.f.w	v8, v16
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_ROD_F_F_W vfncvt.rod.f.f.w	v8, v16
 # CHECK-NEXT:  1      3     1.00                  U      1     VLEN1024X300SiFive7PipeA,VLEN1024X300SiFive7PipeAB VSETVLI            vsetvli	zero, zero, e64, m2, tu, mu
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv	v8, v16, v24
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf	v8, v16, fs0
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv	v8, v16, v24
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf	v8, v16, fs0
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf	v8, v16, fs0
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv	v8, v16, v24
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      39    32.00                        39    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      39    32.00                        39    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      39    32.00                        39    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      39    32.00                        39    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      39    32.00                        39    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      39    32.00                        39    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      39    32.00                        39    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf	v8, v16, fs0
 # CHECK-NEXT:  1      456   456.00                       456   VLEN1024X300SiFive7VA1[1,457],VLEN1024X300SiFive7VA1OrVA2[1,457],VLEN1024X300SiFive7VCQ VFDIV_VV vfdiv.vv	v8, v16, v24
 # CHECK-NEXT:  1      456   456.00                       456   VLEN1024X300SiFive7VA1[1,457],VLEN1024X300SiFive7VA1OrVA2[1,457],VLEN1024X300SiFive7VCQ VFDIV_VF vfdiv.vf	v8, v16, fs0
 # CHECK-NEXT:  1      456   456.00                       456   VLEN1024X300SiFive7VA1[1,457],VLEN1024X300SiFive7VA1OrVA2[1,457],VLEN1024X300SiFive7VCQ VFRDIV_VF vfrdiv.vf	v8, v16, fs0
@@ -2413,22 +2413,22 @@ vfncvt.rod.f.f.w v8, v16
 # CHECK-NEXT:  1      456   456.00                       456   VLEN1024X300SiFive7VA1[1,457],VLEN1024X300SiFive7VA1OrVA2[1,457],VLEN1024X300SiFive7VCQ VFSQRT_V vfsqrt.v	v8, v24
 # CHECK-NEXT:  1      8     4.00                         8     VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFRSQRT7_V vfrsqrt7.v	v8, v24
 # CHECK-NEXT:  1      8     4.00                         8     VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFREC7_V vfrec7.v	v8, v24
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv	v8, v16, v24
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf	v8, v16, fs0
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv	v8, v16, v24
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf	v8, v16, fs0
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv	v8, v16, v24
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf	v8, v16, fs0
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv	v8, v16, v24
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf	v8, v16, fs0
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv	v8, v16, v24
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      35    32.00                        35    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      35    32.00                        35    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      35    32.00                        35    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      35    32.00                        35    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      35    32.00                        35    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      35    32.00                        35    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      35    32.00                        35    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      35    32.00                        35    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      35    32.00                        35    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      35    32.00                        35    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf	v8, v16, fs0
 # CHECK-NEXT:  1      8     4.00                         8     VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFCVT_XU_F_V vfcvt.xu.f.v	v8, v16
 # CHECK-NEXT:  1      8     4.00                         8     VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFCVT_X_F_V vfcvt.x.f.v	v8, v16
 # CHECK-NEXT:  1      8     4.00                         8     VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFCVT_RTZ_XU_F_V vfcvt.rtz.xu.f.v	v8, v16
 # CHECK-NEXT:  1      8     4.00                         8     VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFCVT_RTZ_X_F_V vfcvt.rtz.x.f.v	v8, v16
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v	v8, v16
-# CHECK-NEXT:  1      32    32.00                        32    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      39    32.00                        39    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      39    32.00                        39    VLEN1024X300SiFive7VA1[1,33],VLEN1024X300SiFive7VA1OrVA2[1,33],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v	v8, v16
 # CHECK-NEXT:  1      8     4.00                         8     VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFWCVT_XU_F_V vfwcvt.xu.f.v	v8, v16
 # CHECK-NEXT:  1      8     4.00                         8     VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFWCVT_X_F_V vfwcvt.x.f.v	v8, v16
 # CHECK-NEXT:  1      8     4.00                         8     VLEN1024X300SiFive7VA1[1,5],VLEN1024X300SiFive7VA1OrVA2[1,5],VLEN1024X300SiFive7VCQ VFWCVT_RTZ_XU_F_V vfwcvt.rtz.xu.f.v	v8, v16
@@ -2445,13 +2445,13 @@ vfncvt.rod.f.f.w v8, v16
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_F_F_W vfncvt.f.f.w	v8, v16
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_ROD_F_F_W vfncvt.rod.f.f.w	v8, v16
 # CHECK-NEXT:  1      3     1.00                  U      1     VLEN1024X300SiFive7PipeA,VLEN1024X300SiFive7PipeAB VSETVLI            vsetvli	zero, zero, e64, m4, tu, mu
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv	v8, v16, v24
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf	v8, v16, fs0
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv	v8, v16, v24
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf	v8, v16, fs0
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf	v8, v16, fs0
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv	v8, v16, v24
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      71    64.00                        71    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      71    64.00                        71    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      71    64.00                        71    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      71    64.00                        71    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      71    64.00                        71    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      71    64.00                        71    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      71    64.00                        71    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf	v8, v16, fs0
 # CHECK-NEXT:  1      912   912.00                       912   VLEN1024X300SiFive7VA1[1,913],VLEN1024X300SiFive7VA1OrVA2[1,913],VLEN1024X300SiFive7VCQ VFDIV_VV vfdiv.vv	v8, v16, v24
 # CHECK-NEXT:  1      912   912.00                       912   VLEN1024X300SiFive7VA1[1,913],VLEN1024X300SiFive7VA1OrVA2[1,913],VLEN1024X300SiFive7VCQ VFDIV_VF vfdiv.vf	v8, v16, fs0
 # CHECK-NEXT:  1      912   912.00                       912   VLEN1024X300SiFive7VA1[1,913],VLEN1024X300SiFive7VA1OrVA2[1,913],VLEN1024X300SiFive7VCQ VFRDIV_VF vfrdiv.vf	v8, v16, fs0
@@ -2474,22 +2474,22 @@ vfncvt.rod.f.f.w v8, v16
 # CHECK-NEXT:  1      912   912.00                       912   VLEN1024X300SiFive7VA1[1,913],VLEN1024X300SiFive7VA1OrVA2[1,913],VLEN1024X300SiFive7VCQ VFSQRT_V vfsqrt.v	v8, v24
 # CHECK-NEXT:  1      8     8.00                         8     VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFRSQRT7_V vfrsqrt7.v	v8, v24
 # CHECK-NEXT:  1      8     8.00                         8     VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFREC7_V vfrec7.v	v8, v24
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv	v8, v16, v24
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf	v8, v16, fs0
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv	v8, v16, v24
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf	v8, v16, fs0
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv	v8, v16, v24
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf	v8, v16, fs0
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv	v8, v16, v24
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf	v8, v16, fs0
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv	v8, v16, v24
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      67    64.00                        67    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      67    64.00                        67    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      67    64.00                        67    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      67    64.00                        67    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      67    64.00                        67    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      67    64.00                        67    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      67    64.00                        67    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      67    64.00                        67    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      67    64.00                        67    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      67    64.00                        67    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf	v8, v16, fs0
 # CHECK-NEXT:  1      8     8.00                         8     VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFCVT_XU_F_V vfcvt.xu.f.v	v8, v16
 # CHECK-NEXT:  1      8     8.00                         8     VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFCVT_X_F_V vfcvt.x.f.v	v8, v16
 # CHECK-NEXT:  1      8     8.00                         8     VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFCVT_RTZ_XU_F_V vfcvt.rtz.xu.f.v	v8, v16
 # CHECK-NEXT:  1      8     8.00                         8     VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFCVT_RTZ_X_F_V vfcvt.rtz.x.f.v	v8, v16
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v	v8, v16
-# CHECK-NEXT:  1      64    64.00                        64    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      71    64.00                        71    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      71    64.00                        71    VLEN1024X300SiFive7VA1[1,65],VLEN1024X300SiFive7VA1OrVA2[1,65],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v	v8, v16
 # CHECK-NEXT:  1      8     8.00                         8     VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFWCVT_XU_F_V vfwcvt.xu.f.v	v8, v16
 # CHECK-NEXT:  1      8     8.00                         8     VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFWCVT_X_F_V vfwcvt.x.f.v	v8, v16
 # CHECK-NEXT:  1      8     8.00                         8     VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFWCVT_RTZ_XU_F_V vfwcvt.rtz.xu.f.v	v8, v16
@@ -2506,13 +2506,13 @@ vfncvt.rod.f.f.w v8, v16
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_F_F_W vfncvt.f.f.w	v8, v16
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFNCVT_ROD_F_F_W vfncvt.rod.f.f.w	v8, v16
 # CHECK-NEXT:  1      3     1.00                  U      1     VLEN1024X300SiFive7PipeA,VLEN1024X300SiFive7PipeAB VSETVLI            vsetvli	zero, zero, e64, m8, tu, mu
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv	v8, v16, v24
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf	v8, v16, fs0
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv	v8, v16, v24
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf	v8, v16, fs0
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf	v8, v16, fs0
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv	v8, v16, v24
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      135   128.00                       135   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFADD_VV vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      135   128.00                       135   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFADD_VF vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      135   128.00                       135   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSUB_VV vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      135   128.00                       135   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSUB_VF vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      135   128.00                       135   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFRSUB_VF vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      135   128.00                       135   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMUL_VV vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      135   128.00                       135   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMUL_VF vfmul.vf	v8, v16, fs0
 # CHECK-NEXT:  1      1824  1824.00                      1824  VLEN1024X300SiFive7VA1[1,1825],VLEN1024X300SiFive7VA1OrVA2[1,1825],VLEN1024X300SiFive7VCQ VFDIV_VV vfdiv.vv	v8, v16, v24
 # CHECK-NEXT:  1      1824  1824.00                      1824  VLEN1024X300SiFive7VA1[1,1825],VLEN1024X300SiFive7VA1OrVA2[1,1825],VLEN1024X300SiFive7VCQ VFDIV_VF vfdiv.vf	v8, v16, fs0
 # CHECK-NEXT:  1      1824  1824.00                      1824  VLEN1024X300SiFive7VA1[1,1825],VLEN1024X300SiFive7VA1OrVA2[1,1825],VLEN1024X300SiFive7VCQ VFRDIV_VF vfrdiv.vf	v8, v16, fs0
@@ -2535,22 +2535,22 @@ vfncvt.rod.f.f.w v8, v16
 # CHECK-NEXT:  1      1824  1824.00                      1824  VLEN1024X300SiFive7VA1[1,1825],VLEN1024X300SiFive7VA1OrVA2[1,1825],VLEN1024X300SiFive7VCQ VFSQRT_V vfsqrt.v	v8, v24
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFRSQRT7_V vfrsqrt7.v	v8, v24
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFREC7_V vfrec7.v	v8, v24
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv	v8, v16, v24
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf	v8, v16, fs0
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv	v8, v16, v24
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf	v8, v16, fs0
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv	v8, v16, v24
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf	v8, v16, fs0
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv	v8, v16, v24
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf	v8, v16, fs0
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv	v8, v16, v24
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      131   128.00                       131   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMIN_VV vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      131   128.00                       131   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMIN_VF vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      131   128.00                       131   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMAX_VV vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      131   128.00                       131   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFMAX_VF vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      131   128.00                       131   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJ_VV vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      131   128.00                       131   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJ_VF vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      131   128.00                       131   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJN_VV vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      131   128.00                       131   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJN_VF vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      131   128.00                       131   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJX_VV vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      131   128.00                       131   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFSGNJX_VF vfsgnjx.vf	v8, v16, fs0
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_XU_F_V vfcvt.xu.f.v	v8, v16
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_X_F_V vfcvt.x.f.v	v8, v16
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_RTZ_XU_F_V vfcvt.rtz.xu.f.v	v8, v16
 # CHECK-NEXT:  1      8     16.00                        8     VLEN1024X300SiFive7VA1[1,17],VLEN1024X300SiFive7VA1OrVA2[1,17],VLEN1024X300SiFive7VCQ VFCVT_RTZ_X_F_V vfcvt.rtz.x.f.v	v8, v16
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v	v8, v16
-# CHECK-NEXT:  1      128   128.00                       128   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      135   128.00                       135   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFCVT_F_XU_V vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      135   128.00                       135   VLEN1024X300SiFive7VA1[1,129],VLEN1024X300SiFive7VA1OrVA2[1,129],VLEN1024X300SiFive7VCQ VFCVT_F_X_V vfcvt.f.x.v	v8, v16
 # CHECK-NEXT:  1      8     8.00                         8     VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFWCVT_XU_F_V vfwcvt.xu.f.v	v8, v16
 # CHECK-NEXT:  1      8     8.00                         8     VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFWCVT_X_F_V vfwcvt.x.f.v	v8, v16
 # CHECK-NEXT:  1      8     8.00                         8     VLEN1024X300SiFive7VA1[1,9],VLEN1024X300SiFive7VA1OrVA2[1,9],VLEN1024X300SiFive7VCQ VFWCVT_RTZ_XU_F_V vfwcvt.rtz.xu.f.v	v8, v16
diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
index 434449c..1031932 100644
--- a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
+++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
@@ -253,25 +253,17 @@ public:
       break;
     }
     case BasicBlockLevel: {
-      const auto &BBVecMap = Emb->getBBVecMap();
       for (const BasicBlock &BB : F) {
-        auto It = BBVecMap.find(&BB);
-        if (It != BBVecMap.end()) {
-          OS << BB.getName() << ":";
-          It->second.print(OS);
-        }
+        OS << BB.getName() << ":";
+        Emb->getBBVector(BB).print(OS);
       }
       break;
     }
     case InstructionLevel: {
-      const auto &InstMap = Emb->getInstVecMap();
       for (const BasicBlock &BB : F) {
         for (const Instruction &I : BB) {
-          auto It = InstMap.find(&I);
-          if (It != InstMap.end()) {
-            I.print(OS);
-            It->second.print(OS);
-          }
+          I.print(OS);
+          Emb->getInstVector(I).print(OS);
         }
       }
       break;
diff --git a/llvm/unittests/ADT/BitsetTest.cpp b/llvm/unittests/ADT/BitsetTest.cpp
new file mode 100644
index 0000000..8877397
--- /dev/null
+++ b/llvm/unittests/ADT/BitsetTest.cpp
@@ -0,0 +1,44 @@
+//===- llvm/unittest/Support/BitsetTest.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Bitset.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+template <unsigned NumBits>
+class TestBitsetUInt64Array : public Bitset<NumBits> {
+  static constexpr unsigned NumElts = (NumBits + 63) / 64;
+
+public:
+  TestBitsetUInt64Array(const std::array<uint64_t, NumElts> &B)
+      : Bitset<NumBits>(B) {}
+
+  bool verifyValue(const std::array<uint64_t, NumElts> &B) const {
+    for (unsigned I = 0; I != NumBits; ++I) {
+      bool ReferenceVal =
+          (B[(I / 64)] & (static_cast<uint64_t>(1) << (I % 64))) != 0;
+      if (ReferenceVal != this->test(I))
+        return false;
+    }
+
+    return true;
+  }
+};
+
+TEST(BitsetTest, Construction) {
+  std::array<uint64_t, 2> TestVals = {0x123456789abcdef3, 0x1337d3a0b22c24};
+  TestBitsetUInt64Array<96> Test(TestVals);
+  EXPECT_TRUE(Test.verifyValue(TestVals));
+
+  TestBitsetUInt64Array<65> Test1(TestVals);
+  EXPECT_TRUE(Test1.verifyValue(TestVals));
+}
+} // namespace
diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt
index dafd735..848ccba 100644
--- a/llvm/unittests/ADT/CMakeLists.txt
+++ b/llvm/unittests/ADT/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_unittest(ADTTests
   BitFieldsTest.cpp
   BitmaskEnumTest.cpp
   BitTest.cpp
+  BitsetTest.cpp
   BitVectorTest.cpp
   BreadthFirstIteratorTest.cpp
   BumpPtrListTest.cpp
diff --git a/llvm/unittests/Analysis/IR2VecTest.cpp b/llvm/unittests/Analysis/IR2VecTest.cpp
index 40b4aa2..8ffc5f6 100644
--- a/llvm/unittests/Analysis/IR2VecTest.cpp
+++ b/llvm/unittests/Analysis/IR2VecTest.cpp
@@ -30,7 +30,9 @@ namespace {
 class TestableEmbedder : public Embedder {
 public:
   TestableEmbedder(const Function &F, const Vocabulary &V) : Embedder(F, V) {}
-  void computeEmbeddings(const BasicBlock &BB) const override {}
+  Embedding computeEmbeddings(const Instruction &I) const override {
+    return Embedding();
+  }
 };
 
 TEST(EmbeddingTest, ConstructorsAndAccessors) {
@@ -321,18 +323,12 @@ protected:
   }
 };
 
-TEST_F(IR2VecTestFixture, GetInstVecMap_Symbolic) {
+TEST_F(IR2VecTestFixture, GetInstVec_Symbolic) {
   auto Emb = Embedder::create(IR2VecKind::Symbolic, *F, *V);
   ASSERT_TRUE(static_cast<bool>(Emb));
 
-  const auto &InstMap = Emb->getInstVecMap();
-
-  EXPECT_EQ(InstMap.size(), 2u);
-  EXPECT_TRUE(InstMap.count(AddInst));
-  EXPECT_TRUE(InstMap.count(RetInst));
-
-  const auto &AddEmb = InstMap.at(AddInst);
-  const auto &RetEmb = InstMap.at(RetInst);
+  const auto &AddEmb = Emb->getInstVector(*AddInst);
+  const auto &RetEmb = Emb->getInstVector(*RetInst);
   EXPECT_EQ(AddEmb.size(), 2u);
   EXPECT_EQ(RetEmb.size(), 2u);
 
@@ -340,51 +336,17 @@ TEST_F(IR2VecTestFixture, GetInstVecMap_Symbolic) {
   EXPECT_TRUE(RetEmb.approximatelyEquals(Embedding(2, 15.5)));
 }
 
-TEST_F(IR2VecTestFixture, GetInstVecMap_FlowAware) {
-  auto Emb = Embedder::create(IR2VecKind::FlowAware, *F, *V);
-  ASSERT_TRUE(static_cast<bool>(Emb));
-
-  const auto &InstMap = Emb->getInstVecMap();
-
-  EXPECT_EQ(InstMap.size(), 2u);
-  EXPECT_TRUE(InstMap.count(AddInst));
-  EXPECT_TRUE(InstMap.count(RetInst));
-
-  EXPECT_EQ(InstMap.at(AddInst).size(), 2u);
-  EXPECT_EQ(InstMap.at(RetInst).size(), 2u);
-
-  EXPECT_TRUE(InstMap.at(AddInst).approximatelyEquals(Embedding(2, 25.5)));
-  EXPECT_TRUE(InstMap.at(RetInst).approximatelyEquals(Embedding(2, 32.6)));
-}
-
-TEST_F(IR2VecTestFixture, GetBBVecMap_Symbolic) {
-  auto Emb = Embedder::create(IR2VecKind::Symbolic, *F, *V);
-  ASSERT_TRUE(static_cast<bool>(Emb));
-
-  const auto &BBMap = Emb->getBBVecMap();
-
-  EXPECT_EQ(BBMap.size(), 1u);
-  EXPECT_TRUE(BBMap.count(BB));
-  EXPECT_EQ(BBMap.at(BB).size(), 2u);
-
-  // BB vector should be sum of add and ret: {25.5, 25.5} + {15.5, 15.5} =
-  // {41.0, 41.0}
-  EXPECT_TRUE(BBMap.at(BB).approximatelyEquals(Embedding(2, 41.0)));
-}
-
-TEST_F(IR2VecTestFixture, GetBBVecMap_FlowAware) {
+TEST_F(IR2VecTestFixture, GetInstVec_FlowAware) {
   auto Emb = Embedder::create(IR2VecKind::FlowAware, *F, *V);
   ASSERT_TRUE(static_cast<bool>(Emb));
 
-  const auto &BBMap = Emb->getBBVecMap();
-
-  EXPECT_EQ(BBMap.size(), 1u);
-  EXPECT_TRUE(BBMap.count(BB));
-  EXPECT_EQ(BBMap.at(BB).size(), 2u);
+  const auto &AddEmb = Emb->getInstVector(*AddInst);
+  const auto &RetEmb = Emb->getInstVector(*RetInst);
+  EXPECT_EQ(AddEmb.size(), 2u);
+  EXPECT_EQ(RetEmb.size(), 2u);
 
-  // BB vector should be sum of add and ret: {25.5, 25.5} + {32.6, 32.6} =
-  // {58.1, 58.1}
-  EXPECT_TRUE(BBMap.at(BB).approximatelyEquals(Embedding(2, 58.1)));
+  EXPECT_TRUE(AddEmb.approximatelyEquals(Embedding(2, 25.5)));
+  EXPECT_TRUE(RetEmb.approximatelyEquals(Embedding(2, 32.6)));
 }
 
 TEST_F(IR2VecTestFixture, GetBBVector_Symbolic) {
@@ -394,6 +356,8 @@ TEST_F(IR2VecTestFixture, GetBBVector_Symbolic) {
   const auto &BBVec = Emb->getBBVector(*BB);
 
   EXPECT_EQ(BBVec.size(), 2u);
+  // BB vector should be sum of add and ret: {25.5, 25.5} + {15.5, 15.5} =
+  // {41.0, 41.0}
   EXPECT_TRUE(BBVec.approximatelyEquals(Embedding(2, 41.0)));
 }
 
@@ -404,6 +368,8 @@ TEST_F(IR2VecTestFixture, GetBBVector_FlowAware) {
   const auto &BBVec = Emb->getBBVector(*BB);
 
   EXPECT_EQ(BBVec.size(), 2u);
+  // BB vector should be sum of add and ret: {25.5, 25.5} + {32.6, 32.6} =
+  // {58.1, 58.1}
   EXPECT_TRUE(BBVec.approximatelyEquals(Embedding(2, 58.1)));
 }
 
@@ -446,15 +412,9 @@ TEST_F(IR2VecTestFixture, MultipleComputeEmbeddingsConsistency_Symbolic) {
   EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec3));
   EXPECT_TRUE(FuncVec2.approximatelyEquals(FuncVec3));
 
-  // Also check that instruction vectors remain consistent
-  const auto &InstMap1 = Emb->getInstVecMap();
-  const auto &InstMap2 = Emb->getInstVecMap();
-
-  EXPECT_EQ(InstMap1.size(), InstMap2.size());
-  for (const auto &[Inst, Vec1] : InstMap1) {
-    ASSERT_TRUE(InstMap2.count(Inst));
-    EXPECT_TRUE(Vec1.approximatelyEquals(InstMap2.at(Inst)));
-  }
+  Emb->invalidateEmbeddings();
+  const auto &FuncVec4 = Emb->getFunctionVector();
+  EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec4));
 }
 
 TEST_F(IR2VecTestFixture, MultipleComputeEmbeddingsConsistency_FlowAware) {
@@ -473,15 +433,9 @@ TEST_F(IR2VecTestFixture, MultipleComputeEmbeddingsConsistency_FlowAware) {
   EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec3));
   EXPECT_TRUE(FuncVec2.approximatelyEquals(FuncVec3));
 
-  // Also check that instruction vectors remain consistent
-  const auto &InstMap1 = Emb->getInstVecMap();
-  const auto &InstMap2 = Emb->getInstVecMap();
-
-  EXPECT_EQ(InstMap1.size(), InstMap2.size());
-  for (const auto &[Inst, Vec1] : InstMap1) {
-    ASSERT_TRUE(InstMap2.count(Inst));
-    EXPECT_TRUE(Vec1.approximatelyEquals(InstMap2.at(Inst)));
-  }
+  Emb->invalidateEmbeddings();
+  const auto &FuncVec4 = Emb->getFunctionVector();
+  EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec4));
 }
 
 static constexpr unsigned MaxOpcodes = Vocabulary::MaxOpcodes;
diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp
index 255f62d..1436f0f 100644
--- a/llvm/unittests/IR/ConstantFPRangeTest.cpp
+++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp
@@ -767,4 +767,39 @@ TEST_F(ConstantFPRangeTest, makeExactFCmpRegion) {
   }
 }
 
+TEST_F(ConstantFPRangeTest, abs) {
+  EXPECT_EQ(Full.abs(),
+            ConstantFPRange(APFloat::getZero(Sem, /*Negative=*/false),
+                            APFloat::getInf(Sem, /*Negative=*/false),
+                            /*MayBeQNaN=*/true,
+                            /*MayBeSNaN=*/true));
+  EXPECT_EQ(Empty.abs(), Empty);
+  EXPECT_EQ(Zero.abs(), PosZero);
+  EXPECT_EQ(PosInf.abs(), PosInf);
+  EXPECT_EQ(NegInf.abs(), PosInf);
+  EXPECT_EQ(Some.abs(), SomePos);
+  EXPECT_EQ(SomeNeg.abs(), SomePos);
+  EXPECT_EQ(NaN.abs(), NaN);
+  EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(-2.0), APFloat(3.0)).abs(),
+            ConstantFPRange::getNonNaN(APFloat(0.0), APFloat(3.0)));
+  EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(-3.0), APFloat(2.0)).abs(),
+            ConstantFPRange::getNonNaN(APFloat(0.0), APFloat(3.0)));
+}
+
+TEST_F(ConstantFPRangeTest, negate) {
+  EXPECT_EQ(Full.negate(), Full);
+  EXPECT_EQ(Empty.negate(), Empty);
+  EXPECT_EQ(Zero.negate(), Zero);
+  EXPECT_EQ(PosInf.negate(), NegInf);
+  EXPECT_EQ(NegInf.negate(), PosInf);
+  EXPECT_EQ(Some.negate(), Some);
+  EXPECT_EQ(SomePos.negate(), SomeNeg);
+  EXPECT_EQ(SomeNeg.negate(), SomePos);
+  EXPECT_EQ(NaN.negate(), NaN);
+  EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(-2.0), APFloat(3.0)).negate(),
+            ConstantFPRange::getNonNaN(APFloat(-3.0), APFloat(2.0)));
+  EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(-3.0), APFloat(2.0)).negate(),
+            ConstantFPRange::getNonNaN(APFloat(-2.0), APFloat(3.0)));
+}
+
 } // anonymous namespace
diff --git a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn
index 92e596e..8d19d30 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn
@@ -21,6 +21,7 @@ unittest("ADTTests") {
     "BitTest.cpp",
     "BitVectorTest.cpp",
     "BitmaskEnumTest.cpp",
+    "BitsetTest.cpp",
     "BreadthFirstIteratorTest.cpp",
     "BumpPtrListTest.cpp",
     "CoalescingBitVectorTest.cpp",
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index db1b7e3..6925cec 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -292,18 +292,74 @@ def ROCDL_BarrierOp : ROCDL_Op<"barrier"> {
   let assemblyFormat = "attr-dict";
 }
 
+def ROCDLBufferLDS : LLVM_PointerInAddressSpace<3>;
+
+def ROCDL_BarrierInitOp : ROCDL_IntrOp<"s.barrier.init", [], [], [], 0, 0, 0, 0, [1], ["id"]>,
+  Arguments<(ins Arg<ROCDLBufferLDS, "", []>:$ptr, I32Attr:$id)> {
+  let description = [{
+    Available on gfx1250+.
+  }];
+  let results = (outs);
+  let assemblyFormat = "$ptr `,` $id attr-dict";
+}
+
 def ROCDL_BarrierSignalOp : ROCDL_ConcreteNonMemIntrOp<"s.barrier.signal", [], 0, [0], ["id"]>,
   Arguments<(ins I32Attr:$id)> {
   let results = (outs);
   let assemblyFormat = "$id attr-dict";
 }
 
+def ROCDL_BarrierSignalVarOp : ROCDL_IntrOp<"s.barrier.signal.var", [], [], [], 0, 0, 0, 0, [1], ["id"]>,
+  Arguments<(ins Arg<ROCDLBufferLDS, "", []>:$ptr, I32Attr:$id)> {
+  let description = [{
+    Available on gfx1250+.
+  }];
+  let results = (outs);
+  let assemblyFormat = "$ptr `,` $id attr-dict";
+}
+
+def ROCDL_BarrierJoinOp : ROCDL_IntrOp<"s.barrier.join", [], [], [], 0>,
+  Arguments<(ins Arg<ROCDLBufferLDS, "", []>:$ptr)> {
+  let description = [{
+    Available on gfx1250+.
+  }];
+  let results = (outs);
+  let assemblyFormat = "$ptr attr-dict";
+}
+
+def ROCDL_BarrierLeaveOp : ROCDL_ConcreteNonMemIntrOp<"s.barrier.leave", [], 0, [0], ["id"]>,
+  Arguments<(ins I16Attr:$id)> {
+  let description = [{
+    Available on gfx1250+.
+  }];
+  let results = (outs);
+  let assemblyFormat = "$id attr-dict";
+}
+
 def ROCDL_BarrierWaitOp : ROCDL_ConcreteNonMemIntrOp<"s.barrier.wait", [], 0, [0], ["id"]>,
   Arguments<(ins I16Attr:$id)> {
   let results = (outs);
   let assemblyFormat = "$id attr-dict";
 }
 
+def ROCDL_BarrierSignalIsfirstOp : ROCDL_ConcreteNonMemIntrOp<"s.barrier.signal.isfirst", [], 1, [0], ["id"]>,
+  Arguments<(ins I32Attr:$id)> {
+  let description = [{
+    Available on gfx1250+.
+  }];
+  let results = (outs I1:$res);
+  let assemblyFormat = "$id attr-dict `:` type($res)";
+}
+
+def ROCDL_GetBarrierStateOp : ROCDL_ConcreteNonMemIntrOp<"s.get.barrier.state", [], 1, [0], ["id"]>,
+  Arguments<(ins I32Attr:$id)> {
+  let description = [{
+    Available on gfx1250+.
+  }];
+  let results = (outs I32:$res);
+  let assemblyFormat = "$id attr-dict `:` type($res)";
+}
+
 def ROCDL_WaitDscntOp: ROCDL_ConcreteNonMemIntrOp<"s.wait.dscnt", [], 0, [0], ["count"]>,
   Arguments<(ins I16Attr:$count)> {
   let summary = "Wait until DSCNT is less than or equal to `count`";
@@ -497,7 +553,6 @@ def ROCDL_wmma_i32_16x16x32_iu4 : ROCDL_Wmma_IntrOp<"wmma.i32.16x16x32.iu4", [1]
 // LDS transpose intrinsics (available in GFX950)
 
 def ROCDLGlobalBuffer : LLVM_PointerInAddressSpace<1>;
-def ROCDLBufferLDS : LLVM_PointerInAddressSpace<3>;
 
 class ROCDL_LDS_Read_Tr_IntrOp<string mnemonic> :
   ROCDL_IntrOp<mnemonic, [1], [], [], 1, 0, 1> {
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index a88b59a..358bd33 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -951,6 +951,13 @@ llvm.func @rocdl.s.barrier() {
   llvm.return
 }
 
+llvm.func @rocdl.s.barrier.init(%ptr : !llvm.ptr<3>) {
+  // CHECK-LABEL: rocdl.s.barrier.init
+  // CHECK: rocdl.s.barrier.init %[[PTR:.+]], 1
+  rocdl.s.barrier.init %ptr, 1
+  llvm.return
+}
+
 llvm.func @rocdl.s.barrier.signal() {
   // CHECK-LABEL: rocdl.s.barrier.signal
   // CHECK: rocdl.s.barrier.signal -1
@@ -958,6 +965,27 @@ llvm.func @rocdl.s.barrier.signal() {
   llvm.return
 }
 
+llvm.func @rocdl.s.barrier.signal.var(%ptr : !llvm.ptr<3>) {
+  // CHECK-LABEL: rocdl.s.barrier.signal.var
+  // CHECK: rocdl.s.barrier.signal.var %[[PTR:.+]], 1
+  rocdl.s.barrier.signal.var %ptr, 1
+  llvm.return
+}
+
+llvm.func @rocdl.s.barrier.join(%ptr : !llvm.ptr<3>) {
+  // CHECK-LABEL: rocdl.s.barrier.join
+  // CHECK: rocdl.s.barrier.join %[[PTR:.+]]
+  rocdl.s.barrier.join %ptr
+  llvm.return
+}
+
+llvm.func @rocdl.s.barrier.leave() {
+  // CHECK-LABEL: rocdl.s.barrier.leave
+  // CHECK: rocdl.s.barrier.leave 1
+  rocdl.s.barrier.leave 1
+  llvm.return
+}
+
 llvm.func @rocdl.s.barrier.wait() {
   // CHECK-LABEL: rocdl.s.barrier.wait
   // CHECK: rocdl.s.barrier.wait -1
@@ -965,6 +993,20 @@ llvm.func @rocdl.s.barrier.wait() {
   llvm.return
 }
 
+llvm.func @rocdl.s.barrier.signal.isfirst() {
+  // CHECK-LABEL: rocdl.s.barrier.signal.isfirst
+  // CHECK: rocdl.s.barrier.signal.isfirst 1
+  %0 = rocdl.s.barrier.signal.isfirst 1 : i1
+  llvm.return
+}
+
+llvm.func @rocdl.s.get.barrier.state() {
+  // CHECK-LABEL: rocdl.s.get.barrier.state
+  // CHECK: rocdl.s.get.barrier.state 1
+  %0 = rocdl.s.get.barrier.state 1 : i32
+  llvm.return
+}
+
 llvm.func @rocdl.s.wait.dscnt() {
   // CHECK-LABEL: rocdl.s.wait.dscnt
   // CHECK: rocdl.s.wait.dscnt 0
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 1c0c2eb..fdd2c91 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -192,6 +192,13 @@ llvm.func @rocdl.barrier() {
   llvm.return
 }
 
+llvm.func @rocdl.s.barrier.init(%ptr : !llvm.ptr<3>) {
+  // CHECK-LABEL: rocdl.s.barrier.init
+  // CHECK: call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) %[[PTR:.+]], i32 1)
+  rocdl.s.barrier.init %ptr, 1
+  llvm.return
+}
+
 llvm.func @rocdl.s.barrier.signal() {
   // CHECK-LABEL: rocdl.s.barrier.signal
   // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal(i32 -1)
@@ -199,6 +206,27 @@ llvm.func @rocdl.s.barrier.signal() {
   llvm.return
 }
 
+llvm.func @rocdl.s.barrier.signal.var(%ptr : !llvm.ptr<3>) {
+  // CHECK-LABEL: rocdl.s.barrier.signal.var
+  // CHECK: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) %[[PTR:.+]], i32 1)
+  rocdl.s.barrier.signal.var %ptr, 1
+  llvm.return
+}
+
+llvm.func @rocdl.s.barrier.join(%ptr : !llvm.ptr<3>) {
+  // CHECK-LABEL: rocdl.s.barrier.join
+  // CHECK: call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %[[PTR:.+]])
+  rocdl.s.barrier.join %ptr
+  llvm.return
+}
+
+llvm.func @rocdl.s.barrier.leave() {
+  // CHECK-LABEL: rocdl.s.barrier.leave
+  // CHECK: call void @llvm.amdgcn.s.barrier.leave(i16 1)
+  rocdl.s.barrier.leave 1
+  llvm.return
+}
+
 llvm.func @rocdl.s.barrier.wait() {
   // CHECK-LABEL: rocdl.s.barrier.wait
   // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 -1)
@@ -206,6 +234,20 @@ llvm.func @rocdl.s.barrier.wait() {
   llvm.return
 }
 
+llvm.func @rocdl.s.barrier.signal.isfirst() {
+  // CHECK-LABEL: rocdl.s.barrier.signal.isfirst
+  // CHECK:  %[[OUT:.+]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 1)
+  %0 = rocdl.s.barrier.signal.isfirst 1 : i1
+  llvm.return
+}
+
+llvm.func @rocdl.s.get.barrier.state() {
+  // CHECK-LABEL: rocdl.s.get.barrier.state
+  // CHECK: %[[STATE:.+]] = call i32 @llvm.amdgcn.s.get.barrier.state(i32 1)
+  %0 = rocdl.s.get.barrier.state 1 : i32
+  llvm.return
+}
+
 llvm.func @rocdl.s.wait.dscnt() {
   // CHECK-LABEL: rocdl.s.wait.dscnt
   // CHECK-NEXT: call void @llvm.amdgcn.s.wait.dscnt(i16 0)
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 6ea27187..6329d61 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -1169,6 +1169,11 @@ def OpP : TEST_Op<"op_p"> {
   let results = (outs I32);
 }
 
+def OpQ : TEST_Op<"op_q"> {
+  let arguments = (ins AnyType, AnyType);
+  let results = (outs AnyType);
+}
+
 // Test constant-folding a pattern that maps `(F32) -> SI32`.
 def SignOp : TEST_Op<"sign", [SameOperandsAndResultShape]> {
   let arguments = (ins RankedTensorOf<[F32]>:$operand);
@@ -1207,6 +1212,14 @@ def TestNestedSameOpAndSameArgEqualityPattern :
 def TestMultipleEqualArgsPattern :
   Pat<(OpP $a, $b, $a, $a, $b, $c), (OpN $c, $b)>;
 
+// Test equal arguments checks are applied before user provided constraints.
+def AssertBinOpEqualArgsAndReturnTrue : Constraint<
+  CPred<"assertBinOpEqualArgsAndReturnTrue($0)">>;
+def TestEqualArgsCheckBeforeUserConstraintsPattern :
+  Pat<(OpQ:$op $x, $x),
+      (replaceWithValue $x),
+      [(AssertBinOpEqualArgsAndReturnTrue $op)]>;
+
 // Test for memrefs normalization of an op with normalizable memrefs.
 def OpNorm : TEST_Op<"op_norm", [MemRefsNormalizable]> {
   let arguments = (ins AnyMemRef:$X, AnyMemRef:$Y);
diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp
index f8b5144..ee4fa39 100644
--- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp
+++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp
@@ -70,6 +70,16 @@ static Attribute opMTest(PatternRewriter &rewriter, Value val) {
   return rewriter.getIntegerAttr(rewriter.getIntegerType(32), i);
 }
 
+static bool assertBinOpEqualArgsAndReturnTrue(Value v) {
+  Operation *operation = v.getDefiningOp();
+  if (operation->getOperand(0) != operation->getOperand(1)) {
+    // Name binding equality check must happen before user-defined constraints,
+    // thus this must not be triggered.
+    llvm::report_fatal_error("Arguments are not equal");
+  }
+  return true;
+}
+
 namespace {
 #include "TestPatterns.inc"
 } // namespace
diff --git a/mlir/test/mlir-tblgen/pattern.mlir b/mlir/test/mlir-tblgen/pattern.mlir
index bd55338..ffb78c2 100644
--- a/mlir/test/mlir-tblgen/pattern.mlir
+++ b/mlir/test/mlir-tblgen/pattern.mlir
@@ -156,16 +156,19 @@ func.func @verifyNestedOpEqualArgs(
   // def TestNestedOpEqualArgsPattern :
   //   Pat<(OpN $b, (OpP $a, $b, $c, $d, $e, $f)), (replaceWithValue $b)>;
 
-  // CHECK: %arg1
+  // CHECK: "test.op_o"(%arg1)
   %0 = "test.op_p"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5)
     : (i32, i32, i32, i32, i32, i32) -> (i32)
   %1 = "test.op_n"(%arg1, %0) : (i32, i32) -> (i32)
+  %2 = "test.op_o"(%1) : (i32) -> (i32)
 
-  // CHECK: test.op_p
-  // CHECK: test.op_n
-  %2 = "test.op_p"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5)
+  // CHECK-NEXT: %[[P:.*]] = "test.op_p"
+  // CHECK-NEXT: %[[N:.*]] = "test.op_n"(%arg0, %[[P]])
+  // CHECK-NEXT: "test.op_o"(%[[N]])
+  %3 = "test.op_p"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5)
     : (i32, i32, i32, i32, i32, i32) -> (i32)
-  %3 = "test.op_n"(%arg0, %2) : (i32, i32) -> (i32)
+  %4 = "test.op_n"(%arg0, %3) : (i32, i32) -> (i32)
+  %5 = "test.op_o"(%4) : (i32) -> (i32)
 
   return
 }
@@ -206,6 +209,21 @@ func.func @verifyMultipleEqualArgs(
   return
 }
 
+func.func @verifyEqualArgsCheckBeforeUserConstraints(%arg0: i32, %arg1: f32) {
+  // def TestEqualArgsCheckBeforeUserConstraintsPattern :
+  //   Pat<(OpQ:$op $x, $x),
+  //       (replaceWithValue $x),
+  //       [(AssertBinOpEqualArgsAndReturnTrue $op)]>;
+
+  // CHECK: "test.op_q"(%arg0, %arg1)
+  %0 = "test.op_q"(%arg0, %arg1) : (i32, f32) -> (i32)
+
+  // CHECK: "test.op_q"(%arg1, %arg0)
+  %1 = "test.op_q"(%arg1, %arg0) : (f32, i32) -> (i32)
+
+  return
+}
+
 //===----------------------------------------------------------------------===//
 // Test Symbol Binding
 //===----------------------------------------------------------------------===//
diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp
index 605033d..40bc1a9 100644
--- a/mlir/tools/mlir-tblgen/RewriterGen.cpp
+++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp
@@ -1024,6 +1024,32 @@ void PatternEmitter::emitMatchLogic(DagNode tree, StringRef opName) {
   int depth = 0;
   emitMatch(tree, opName, depth);
 
+  // Some of the operands could be bound to the same symbol name, we need
+  // to enforce equality constraint on those.
+  // This has to happen before user provided constraints, which may assume the
+  // same name checks are already performed, since in the pattern source code
+  // the user provided constraints appear later.
+  // TODO: we should be able to emit equality checks early
+  // and short circuit unnecessary work if vars are not equal.
+  for (auto symbolInfoIt = symbolInfoMap.begin();
+       symbolInfoIt != symbolInfoMap.end();) {
+    auto range = symbolInfoMap.getRangeOfEqualElements(symbolInfoIt->first);
+    auto startRange = range.first;
+    auto endRange = range.second;
+
+    auto firstOperand = symbolInfoIt->second.getVarName(symbolInfoIt->first);
+    for (++startRange; startRange != endRange; ++startRange) {
+      auto secondOperand = startRange->second.getVarName(symbolInfoIt->first);
+      emitMatchCheck(
+          opName,
+          formatv("*{0}.begin() == *{1}.begin()", firstOperand, secondOperand),
+          formatv("\"Operands '{0}' and '{1}' must be equal\"", firstOperand,
+                  secondOperand));
+    }
+
+    symbolInfoIt = endRange;
+  }
+
   for (auto &appliedConstraint : pattern.getConstraints()) {
     auto &constraint = appliedConstraint.constraint;
     auto &entities = appliedConstraint.entities;
@@ -1068,29 +1094,6 @@ void PatternEmitter::emitMatchLogic(DagNode tree, StringRef opName) {
     }
   }
 
-  // Some of the operands could be bound to the same symbol name, we need
-  // to enforce equality constraint on those.
-  // TODO: we should be able to emit equality checks early
-  // and short circuit unnecessary work if vars are not equal.
-  for (auto symbolInfoIt = symbolInfoMap.begin();
-       symbolInfoIt != symbolInfoMap.end();) {
-    auto range = symbolInfoMap.getRangeOfEqualElements(symbolInfoIt->first);
-    auto startRange = range.first;
-    auto endRange = range.second;
-
-    auto firstOperand = symbolInfoIt->second.getVarName(symbolInfoIt->first);
-    for (++startRange; startRange != endRange; ++startRange) {
-      auto secondOperand = startRange->second.getVarName(symbolInfoIt->first);
-      emitMatchCheck(
-          opName,
-          formatv("*{0}.begin() == *{1}.begin()", firstOperand, secondOperand),
-          formatv("\"Operands '{0}' and '{1}' must be equal\"", firstOperand,
-                  secondOperand));
-    }
-
-    symbolInfoIt = endRange;
-  }
-
   LLVM_DEBUG(llvm::dbgs() << "--- done emitting match logic ---\n");
 }